mirror of https://github.com/go-gitea/gitea
Search bar for issues/pulls (#530)
parent
8bc431952f
commit
833f8b94c2
@ -0,0 +1,183 @@ |
||||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package models |
||||
|
||||
import ( |
||||
"fmt" |
||||
"os" |
||||
"strconv" |
||||
"strings" |
||||
|
||||
"code.gitea.io/gitea/modules/log" |
||||
"code.gitea.io/gitea/modules/setting" |
||||
"code.gitea.io/gitea/modules/util" |
||||
"github.com/blevesearch/bleve" |
||||
"github.com/blevesearch/bleve/analysis/analyzer/simple" |
||||
"github.com/blevesearch/bleve/search/query" |
||||
) |
||||
|
||||
// issueIndexerUpdateQueue queue of issues that need to be updated in the issues
|
||||
// indexer
|
||||
var issueIndexerUpdateQueue chan *Issue |
||||
|
||||
// issueIndexer (thread-safe) index for searching issues
|
||||
var issueIndexer bleve.Index |
||||
|
||||
// issueIndexerData data stored in the issue indexer
|
||||
type issueIndexerData struct { |
||||
ID int64 |
||||
RepoID int64 |
||||
|
||||
Title string |
||||
Content string |
||||
} |
||||
|
||||
// numericQuery an numeric-equality query for the given value and field
|
||||
func numericQuery(value int64, field string) *query.NumericRangeQuery { |
||||
f := float64(value) |
||||
tru := true |
||||
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru) |
||||
q.SetField(field) |
||||
return q |
||||
} |
||||
|
||||
// SearchIssuesByKeyword searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) { |
||||
fields := strings.Fields(strings.ToLower(keyword)) |
||||
indexerQuery := bleve.NewConjunctionQuery( |
||||
numericQuery(repoID, "RepoID"), |
||||
bleve.NewDisjunctionQuery( |
||||
bleve.NewPhraseQuery(fields, "Title"), |
||||
bleve.NewPhraseQuery(fields, "Content"), |
||||
)) |
||||
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false) |
||||
search.Fields = []string{"ID"} |
||||
|
||||
result, err := issueIndexer.Search(search) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
issueIDs := make([]int64, len(result.Hits)) |
||||
for i, hit := range result.Hits { |
||||
issueIDs[i] = int64(hit.Fields["ID"].(float64)) |
||||
} |
||||
return issueIDs, nil |
||||
} |
||||
|
||||
// InitIssueIndexer initialize issue indexer
|
||||
func InitIssueIndexer() { |
||||
_, err := os.Stat(setting.Indexer.IssuePath) |
||||
if err != nil { |
||||
if os.IsNotExist(err) { |
||||
if err = createIssueIndexer(); err != nil { |
||||
log.Fatal(4, "CreateIssuesIndexer: %v", err) |
||||
} |
||||
if err = populateIssueIndexer(); err != nil { |
||||
log.Fatal(4, "PopulateIssuesIndex: %v", err) |
||||
} |
||||
} else { |
||||
log.Fatal(4, "InitIssuesIndexer: %v", err) |
||||
} |
||||
} else { |
||||
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) |
||||
if err != nil { |
||||
log.Fatal(4, "InitIssuesIndexer, open index: %v", err) |
||||
} |
||||
} |
||||
issueIndexerUpdateQueue = make(chan *Issue, setting.Indexer.UpdateQueueLength) |
||||
go processIssueIndexerUpdateQueue() |
||||
// TODO close issueIndexer when Gitea closes
|
||||
} |
||||
|
||||
// createIssueIndexer create an issue indexer if one does not already exist
|
||||
func createIssueIndexer() error { |
||||
mapping := bleve.NewIndexMapping() |
||||
docMapping := bleve.NewDocumentMapping() |
||||
|
||||
docMapping.AddFieldMappingsAt("ID", bleve.NewNumericFieldMapping()) |
||||
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) |
||||
|
||||
textFieldMapping := bleve.NewTextFieldMapping() |
||||
textFieldMapping.Analyzer = simple.Name |
||||
docMapping.AddFieldMappingsAt("Title", textFieldMapping) |
||||
docMapping.AddFieldMappingsAt("Content", textFieldMapping) |
||||
|
||||
mapping.AddDocumentMapping("issues", docMapping) |
||||
|
||||
var err error |
||||
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) |
||||
return err |
||||
} |
||||
|
||||
// populateIssueIndexer populate the issue indexer with issue data
|
||||
func populateIssueIndexer() error { |
||||
for page := 1; ; page++ { |
||||
repos, err := Repositories(&SearchRepoOptions{ |
||||
Page: page, |
||||
PageSize: 10, |
||||
}) |
||||
if err != nil { |
||||
return fmt.Errorf("Repositories: %v", err) |
||||
} |
||||
if len(repos) == 0 { |
||||
return nil |
||||
} |
||||
batch := issueIndexer.NewBatch() |
||||
for _, repo := range repos { |
||||
issues, err := Issues(&IssuesOptions{ |
||||
RepoID: repo.ID, |
||||
IsClosed: util.OptionalBoolNone, |
||||
IsPull: util.OptionalBoolNone, |
||||
Page: -1, // do not page
|
||||
}) |
||||
if err != nil { |
||||
return fmt.Errorf("Issues: %v", err) |
||||
} |
||||
for _, issue := range issues { |
||||
err = batch.Index(issue.indexUID(), issue.issueData()) |
||||
if err != nil { |
||||
return fmt.Errorf("batch.Index: %v", err) |
||||
} |
||||
} |
||||
} |
||||
if err = issueIndexer.Batch(batch); err != nil { |
||||
return fmt.Errorf("index.Batch: %v", err) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func processIssueIndexerUpdateQueue() { |
||||
for { |
||||
select { |
||||
case issue := <-issueIndexerUpdateQueue: |
||||
if err := issueIndexer.Index(issue.indexUID(), issue.issueData()); err != nil { |
||||
log.Error(4, "issuesIndexer.Index: %v", err) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
// indexUID a unique identifier for an issue used in full-text indices
|
||||
func (issue *Issue) indexUID() string { |
||||
return strconv.FormatInt(issue.ID, 36) |
||||
} |
||||
|
||||
func (issue *Issue) issueData() *issueIndexerData { |
||||
return &issueIndexerData{ |
||||
ID: issue.ID, |
||||
RepoID: issue.RepoID, |
||||
Title: issue.Title, |
||||
Content: issue.Content, |
||||
} |
||||
} |
||||
|
||||
// UpdateIssueIndexer add/update an issue to the issue indexer
|
||||
func UpdateIssueIndexer(issue *Issue) { |
||||
go func() { |
||||
issueIndexerUpdateQueue <- issue |
||||
}() |
||||
} |
@ -0,0 +1,14 @@ |
||||
// Copyright 2016 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package indexer |
||||
|
||||
import ( |
||||
"code.gitea.io/gitea/models" |
||||
) |
||||
|
||||
// NewContext start indexer service
|
||||
func NewContext() { |
||||
models.InitIssueIndexer() |
||||
} |
@ -0,0 +1,25 @@ |
||||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package util |
||||
|
||||
// OptionalBool a boolean that can be "null"
|
||||
type OptionalBool byte |
||||
|
||||
const ( |
||||
// OptionalBoolNone a "null" boolean value
|
||||
OptionalBoolNone = iota |
||||
// OptionalBoolTrue a "true" boolean value
|
||||
OptionalBoolTrue |
||||
// OptionalBoolFalse a "false" boolean value
|
||||
OptionalBoolFalse |
||||
) |
||||
|
||||
// OptionalBoolOf get the corresponding OptionalBool of a bool
|
||||
func OptionalBoolOf(b bool) OptionalBool { |
||||
if b { |
||||
return OptionalBoolTrue |
||||
} |
||||
return OptionalBoolFalse |
||||
} |
@ -1,4 +1,4 @@ |
||||
<div class="ui compact small menu"> |
||||
<div class="ui compact left small menu"> |
||||
<a class="{{if .PageIsLabels}}active{{end}} item" href="{{.RepoLink}}/labels">{{.i18n.Tr "repo.labels"}}</a> |
||||
<a class="{{if .PageIsMilestones}}active{{end}} item" href="{{.RepoLink}}/milestones">{{.i18n.Tr "repo.milestones"}}</a> |
||||
</div> |
||||
|
@ -0,0 +1,13 @@ |
||||
<form class="ui form"> |
||||
<div class="ui fluid action input"> |
||||
<input type="hidden" name="type" value="{{$.ViewType}}"/> |
||||
<input type="hidden" name="state" value="{{$.State}}"/> |
||||
<input type="hidden" name="labels" value="{{.SelectLabels}}"/> |
||||
<input type="hidden" name="milestone" value="{{$.MilestoneID}}"/> |
||||
<input type="hidden" name="assignee" value="{{$.AssigneeID}}"/> |
||||
<div class="ui search action input"> |
||||
<input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "explore.search"}}..." autofocus> |
||||
</div> |
||||
<button class="ui blue button" type="submit">{{.i18n.Tr "explore.search"}}</button> |
||||
</div> |
||||
</form> |
@ -0,0 +1,16 @@ |
||||
# Contributing to Bleve |
||||
|
||||
We look forward to your contributions, but ask that you first review these guidelines. |
||||
|
||||
### Sign the CLA |
||||
|
||||
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements. |
||||
|
||||
### Submitting a Pull Request |
||||
|
||||
All types of contributions are welcome, but please keep the following in mind: |
||||
|
||||
- If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged. |
||||
- Existing tests should continue to pass, new tests for the contribution are nice to have. |
||||
- All code should have gone through `go fmt` |
||||
- All code should pass `go vet` |
@ -0,0 +1,202 @@ |
||||
|
||||
Apache License |
||||
Version 2.0, January 2004 |
||||
http://www.apache.org/licenses/ |
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
||||
|
||||
1. Definitions. |
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, |
||||
and distribution as defined by Sections 1 through 9 of this document. |
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by |
||||
the copyright owner that is granting the License. |
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all |
||||
other entities that control, are controlled by, or are under common |
||||
control with that entity. For the purposes of this definition, |
||||
"control" means (i) the power, direct or indirect, to cause the |
||||
direction or management of such entity, whether by contract or |
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
||||
outstanding shares, or (iii) beneficial ownership of such entity. |
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity |
||||
exercising permissions granted by this License. |
||||
|
||||
"Source" form shall mean the preferred form for making modifications, |
||||
including but not limited to software source code, documentation |
||||
source, and configuration files. |
||||
|
||||
"Object" form shall mean any form resulting from mechanical |
||||
transformation or translation of a Source form, including but |
||||
not limited to compiled object code, generated documentation, |
||||
and conversions to other media types. |
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or |
||||
Object form, made available under the License, as indicated by a |
||||
copyright notice that is included in or attached to the work |
||||
(an example is provided in the Appendix below). |
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object |
||||
form, that is based on (or derived from) the Work and for which the |
||||
editorial revisions, annotations, elaborations, or other modifications |
||||
represent, as a whole, an original work of authorship. For the purposes |
||||
of this License, Derivative Works shall not include works that remain |
||||
separable from, or merely link (or bind by name) to the interfaces of, |
||||
the Work and Derivative Works thereof. |
||||
|
||||
"Contribution" shall mean any work of authorship, including |
||||
the original version of the Work and any modifications or additions |
||||
to that Work or Derivative Works thereof, that is intentionally |
||||
submitted to Licensor for inclusion in the Work by the copyright owner |
||||
or by an individual or Legal Entity authorized to submit on behalf of |
||||
the copyright owner. For the purposes of this definition, "submitted" |
||||
means any form of electronic, verbal, or written communication sent |
||||
to the Licensor or its representatives, including but not limited to |
||||
communication on electronic mailing lists, source code control systems, |
||||
and issue tracking systems that are managed by, or on behalf of, the |
||||
Licensor for the purpose of discussing and improving the Work, but |
||||
excluding communication that is conspicuously marked or otherwise |
||||
designated in writing by the copyright owner as "Not a Contribution." |
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity |
||||
on behalf of whom a Contribution has been received by Licensor and |
||||
subsequently incorporated within the Work. |
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of |
||||
this License, each Contributor hereby grants to You a perpetual, |
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||
copyright license to reproduce, prepare Derivative Works of, |
||||
publicly display, publicly perform, sublicense, and distribute the |
||||
Work and such Derivative Works in Source or Object form. |
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of |
||||
this License, each Contributor hereby grants to You a perpetual, |
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||
(except as stated in this section) patent license to make, have made, |
||||
use, offer to sell, sell, import, and otherwise transfer the Work, |
||||
where such license applies only to those patent claims licensable |
||||
by such Contributor that are necessarily infringed by their |
||||
Contribution(s) alone or by combination of their Contribution(s) |
||||
with the Work to which such Contribution(s) was submitted. If You |
||||
institute patent litigation against any entity (including a |
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work |
||||
or a Contribution incorporated within the Work constitutes direct |
||||
or contributory patent infringement, then any patent licenses |
||||
granted to You under this License for that Work shall terminate |
||||
as of the date such litigation is filed. |
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the |
||||
Work or Derivative Works thereof in any medium, with or without |
||||
modifications, and in Source or Object form, provided that You |
||||
meet the following conditions: |
||||
|
||||
(a) You must give any other recipients of the Work or |
||||
Derivative Works a copy of this License; and |
||||
|
||||
(b) You must cause any modified files to carry prominent notices |
||||
stating that You changed the files; and |
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works |
||||
that You distribute, all copyright, patent, trademark, and |
||||
attribution notices from the Source form of the Work, |
||||
excluding those notices that do not pertain to any part of |
||||
the Derivative Works; and |
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its |
||||
distribution, then any Derivative Works that You distribute must |
||||
include a readable copy of the attribution notices contained |
||||
within such NOTICE file, excluding those notices that do not |
||||
pertain to any part of the Derivative Works, in at least one |
||||
of the following places: within a NOTICE text file distributed |
||||
as part of the Derivative Works; within the Source form or |
||||
documentation, if provided along with the Derivative Works; or, |
||||
within a display generated by the Derivative Works, if and |
||||
wherever such third-party notices normally appear. The contents |
||||
of the NOTICE file are for informational purposes only and |
||||
do not modify the License. You may add Your own attribution |
||||
notices within Derivative Works that You distribute, alongside |
||||
or as an addendum to the NOTICE text from the Work, provided |
||||
that such additional attribution notices cannot be construed |
||||
as modifying the License. |
||||
|
||||
You may add Your own copyright statement to Your modifications and |
||||
may provide additional or different license terms and conditions |
||||
for use, reproduction, or distribution of Your modifications, or |
||||
for any such Derivative Works as a whole, provided Your use, |
||||
reproduction, and distribution of the Work otherwise complies with |
||||
the conditions stated in this License. |
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise, |
||||
any Contribution intentionally submitted for inclusion in the Work |
||||
by You to the Licensor shall be under the terms and conditions of |
||||
this License, without any additional terms or conditions. |
||||
Notwithstanding the above, nothing herein shall supersede or modify |
||||
the terms of any separate license agreement you may have executed |
||||
with Licensor regarding such Contributions. |
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade |
||||
names, trademarks, service marks, or product names of the Licensor, |
||||
except as required for reasonable and customary use in describing the |
||||
origin of the Work and reproducing the content of the NOTICE file. |
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or |
||||
agreed to in writing, Licensor provides the Work (and each |
||||
Contributor provides its Contributions) on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
||||
implied, including, without limitation, any warranties or conditions |
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
||||
PARTICULAR PURPOSE. You are solely responsible for determining the |
||||
appropriateness of using or redistributing the Work and assume any |
||||
risks associated with Your exercise of permissions under this License. |
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory, |
||||
whether in tort (including negligence), contract, or otherwise, |
||||
unless required by applicable law (such as deliberate and grossly |
||||
negligent acts) or agreed to in writing, shall any Contributor be |
||||
liable to You for damages, including any direct, indirect, special, |
||||
incidental, or consequential damages of any character arising as a |
||||
result of this License or out of the use or inability to use the |
||||
Work (including but not limited to damages for loss of goodwill, |
||||
work stoppage, computer failure or malfunction, or any and all |
||||
other commercial damages or losses), even if such Contributor |
||||
has been advised of the possibility of such damages. |
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing |
||||
the Work or Derivative Works thereof, You may choose to offer, |
||||
and charge a fee for, acceptance of support, warranty, indemnity, |
||||
or other liability obligations and/or rights consistent with this |
||||
License. However, in accepting such obligations, You may act only |
||||
on Your own behalf and on Your sole responsibility, not on behalf |
||||
of any other Contributor, and only if You agree to indemnify, |
||||
defend, and hold each Contributor harmless for any liability |
||||
incurred by, or claims asserted against, such Contributor by reason |
||||
of your accepting any such warranty or additional liability. |
||||
|
||||
END OF TERMS AND CONDITIONS |
||||
|
||||
APPENDIX: How to apply the Apache License to your work. |
||||
|
||||
To apply the Apache License to your work, attach the following |
||||
boilerplate notice, with the fields enclosed by brackets "[]" |
||||
replaced with your own identifying information. (Don't include |
||||
the brackets!) The text should be enclosed in the appropriate |
||||
comment syntax for the file format. We also recommend that a |
||||
file or class name and description of purpose be included on the |
||||
same "printed page" as the copyright notice for easier |
||||
identification within third-party archives. |
||||
|
||||
Copyright [yyyy] [name of copyright owner] |
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); |
||||
you may not use this file except in compliance with the License. |
||||
You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
@ -0,0 +1,62 @@ |
||||
# ![bleve](docs/bleve.png) bleve |
||||
|
||||
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) |
||||
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) |
||||
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve) |
||||
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve) |
||||
|
||||
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/) |
||||
|
||||
Try out bleve live by [searching the bleve website](http://www.blevesearch.com/search/?q=bleve). |
||||
|
||||
## Features |
||||
|
||||
* Index any go data structure (including JSON) |
||||
* Intelligent defaults backed up by powerful configuration |
||||
* Supported field types: |
||||
* Text, Numeric, Date |
||||
* Supported query types: |
||||
* Term, Phrase, Match, Match Phrase, Prefix |
||||
* Conjunction, Disjunction, Boolean |
||||
* Numeric Range, Date Range |
||||
* Simple query [syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry |
||||
* tf-idf Scoring |
||||
* Search result match highlighting |
||||
* Supports Aggregating Facets: |
||||
* Terms Facet |
||||
* Numeric Range Facet |
||||
* Date Range Facet |
||||
|
||||
## Discussion |
||||
|
||||
Discuss usage and development of bleve in the [google group](https://groups.google.com/forum/#!forum/bleve). |
||||
|
||||
## Indexing |
||||
|
||||
message := struct{ |
||||
Id string |
||||
From string |
||||
Body string |
||||
}{ |
||||
Id: "example", |
||||
From: "marty.schoch@gmail.com", |
||||
Body: "bleve indexing is easy", |
||||
} |
||||
|
||||
mapping := bleve.NewIndexMapping() |
||||
index, err := bleve.New("example.bleve", mapping) |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
index.Index(message.Id, message) |
||||
|
||||
## Querying |
||||
|
||||
index, _ := bleve.Open("example.bleve") |
||||
query := bleve.NewQueryStringQuery("bleve") |
||||
searchRequest := bleve.NewSearchRequest(query) |
||||
searchResult, _ := index.Search(searchRequest) |
||||
|
||||
## License |
||||
|
||||
Apache License Version 2.0 |
@ -0,0 +1,46 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package simple |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/analysis/token/lowercase" |
||||
"github.com/blevesearch/bleve/analysis/tokenizer/letter" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
const Name = "simple" |
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { |
||||
tokenizer, err := cache.TokenizerNamed(letter.Name) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv := analysis.Analyzer{ |
||||
Tokenizer: tokenizer, |
||||
TokenFilters: []analysis.TokenFilter{ |
||||
toLowerFilter, |
||||
}, |
||||
} |
||||
return &rv, nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor) |
||||
} |
@ -0,0 +1,52 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package standard |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/analysis/lang/en" |
||||
"github.com/blevesearch/bleve/analysis/token/lowercase" |
||||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
const Name = "standard" |
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { |
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv := analysis.Analyzer{ |
||||
Tokenizer: tokenizer, |
||||
TokenFilters: []analysis.TokenFilter{ |
||||
toLowerFilter, |
||||
stopEnFilter, |
||||
}, |
||||
} |
||||
return &rv, nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor) |
||||
} |
@ -0,0 +1,64 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package flexible |
||||
|
||||
import ( |
||||
"fmt" |
||||
"time" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
const Name = "flexiblego" |
||||
|
||||
type DateTimeParser struct { |
||||
layouts []string |
||||
} |
||||
|
||||
func New(layouts []string) *DateTimeParser { |
||||
return &DateTimeParser{ |
||||
layouts: layouts, |
||||
} |
||||
} |
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) { |
||||
for _, layout := range p.layouts { |
||||
rv, err := time.Parse(layout, input) |
||||
if err == nil { |
||||
return rv, nil |
||||
} |
||||
} |
||||
return time.Time{}, analysis.ErrInvalidDateTime |
||||
} |
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { |
||||
layouts, ok := config["layouts"].([]interface{}) |
||||
if !ok { |
||||
return nil, fmt.Errorf("must specify layouts") |
||||
} |
||||
var layoutStrs []string |
||||
for _, layout := range layouts { |
||||
layoutStr, ok := layout.(string) |
||||
if ok { |
||||
layoutStrs = append(layoutStrs, layoutStr) |
||||
} |
||||
} |
||||
return New(layoutStrs), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) |
||||
} |
@ -0,0 +1,45 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package optional |
||||
|
||||
import ( |
||||
"time" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/analysis/datetime/flexible" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
const Name = "dateTimeOptional" |
||||
|
||||
const rfc3339NoTimezone = "2006-01-02T15:04:05" |
||||
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05" |
||||
const rfc3339NoTime = "2006-01-02" |
||||
|
||||
var layouts = []string{ |
||||
time.RFC3339Nano, |
||||
time.RFC3339, |
||||
rfc3339NoTimezone, |
||||
rfc3339NoTimezoneNoT, |
||||
rfc3339NoTime, |
||||
} |
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { |
||||
return flexible.New(layouts), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) |
||||
} |
@ -0,0 +1,111 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis |
||||
|
||||
// TokenLocation represents one occurrence of a term at a particular location in
|
||||
// a field. Start, End and Position have the same meaning as in analysis.Token.
|
||||
// Field and ArrayPositions identify the field value in the source document.
|
||||
// See document.Field for details.
|
||||
type TokenLocation struct { |
||||
Field string |
||||
ArrayPositions []uint64 |
||||
Start int |
||||
End int |
||||
Position int |
||||
} |
||||
|
||||
// TokenFreq represents all the occurrences of a term in all fields of a
|
||||
// document.
|
||||
type TokenFreq struct { |
||||
Term []byte |
||||
Locations []*TokenLocation |
||||
frequency int |
||||
} |
||||
|
||||
func (tf *TokenFreq) Frequency() int { |
||||
return tf.frequency |
||||
} |
||||
|
||||
// TokenFrequencies maps document terms to their combined frequencies from all
|
||||
// fields.
|
||||
type TokenFrequencies map[string]*TokenFreq |
||||
|
||||
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) { |
||||
// walk the new token frequencies
|
||||
for tfk, tf := range other { |
||||
// set the remoteField value in incoming token freqs
|
||||
for _, l := range tf.Locations { |
||||
l.Field = remoteField |
||||
} |
||||
existingTf, exists := tfs[tfk] |
||||
if exists { |
||||
existingTf.Locations = append(existingTf.Locations, tf.Locations...) |
||||
existingTf.frequency = existingTf.frequency + tf.frequency |
||||
} else { |
||||
tfs[tfk] = &TokenFreq{ |
||||
Term: tf.Term, |
||||
frequency: tf.frequency, |
||||
Locations: make([]*TokenLocation, len(tf.Locations)), |
||||
} |
||||
copy(tfs[tfk].Locations, tf.Locations) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, includeTermVectors bool) TokenFrequencies { |
||||
rv := make(map[string]*TokenFreq, len(tokens)) |
||||
|
||||
if includeTermVectors { |
||||
tls := make([]TokenLocation, len(tokens)) |
||||
tlNext := 0 |
||||
|
||||
for _, token := range tokens { |
||||
tls[tlNext] = TokenLocation{ |
||||
ArrayPositions: arrayPositions, |
||||
Start: token.Start, |
||||
End: token.End, |
||||
Position: token.Position, |
||||
} |
||||
|
||||
curr, ok := rv[string(token.Term)] |
||||
if ok { |
||||
curr.Locations = append(curr.Locations, &tls[tlNext]) |
||||
curr.frequency++ |
||||
} else { |
||||
rv[string(token.Term)] = &TokenFreq{ |
||||
Term: token.Term, |
||||
Locations: []*TokenLocation{&tls[tlNext]}, |
||||
frequency: 1, |
||||
} |
||||
} |
||||
|
||||
tlNext++ |
||||
} |
||||
} else { |
||||
for _, token := range tokens { |
||||
curr, exists := rv[string(token.Term)] |
||||
if exists { |
||||
curr.frequency++ |
||||
} else { |
||||
rv[string(token.Term)] = &TokenFreq{ |
||||
Term: token.Term, |
||||
frequency: 1, |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return rv |
||||
} |
@ -0,0 +1,70 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package en implements an analyzer with reasonable defaults for processing
|
||||
// English text.
|
||||
//
|
||||
// It strips possessive suffixes ('s), transforms tokens to lower case,
|
||||
// removes stopwords from a built-in list, and applies porter stemming.
|
||||
//
|
||||
// The built-in stopwords list is defined in EnglishStopWords.
|
||||
package en |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/registry" |
||||
|
||||
"github.com/blevesearch/bleve/analysis/token/lowercase" |
||||
"github.com/blevesearch/bleve/analysis/token/porter" |
||||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" |
||||
) |
||||
|
||||
const AnalyzerName = "en" |
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { |
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
possEnFilter, err := cache.TokenFilterNamed(PossessiveName) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
stopEnFilter, err := cache.TokenFilterNamed(StopName) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv := analysis.Analyzer{ |
||||
Tokenizer: tokenizer, |
||||
TokenFilters: []analysis.TokenFilter{ |
||||
possEnFilter, |
||||
toLowerFilter, |
||||
stopEnFilter, |
||||
stemmerEnFilter, |
||||
}, |
||||
} |
||||
return &rv, nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) |
||||
} |
@ -0,0 +1,67 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en |
||||
|
||||
import ( |
||||
"unicode/utf8" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
// PossessiveName is the name PossessiveFilter is registered as
|
||||
// in the bleve registry.
|
||||
const PossessiveName = "possessive_en" |
||||
|
||||
const rightSingleQuotationMark = '’' |
||||
const apostrophe = '\'' |
||||
const fullWidthApostrophe = ''' |
||||
|
||||
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe |
||||
|
||||
// PossessiveFilter implements a TokenFilter which
|
||||
// strips the English possessive suffix ('s) from tokens.
|
||||
// It handle a variety of apostrophe types, is case-insensitive
|
||||
// and doesn't distinguish between possessive and contraction.
|
||||
// (ie "She's So Rad" becomes "She So Rad")
|
||||
type PossessiveFilter struct { |
||||
} |
||||
|
||||
func NewPossessiveFilter() *PossessiveFilter { |
||||
return &PossessiveFilter{} |
||||
} |
||||
|
||||
func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream { |
||||
for _, token := range input { |
||||
lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term) |
||||
if lastRune == 's' || lastRune == 'S' { |
||||
nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize]) |
||||
if nextLastRune == rightSingleQuotationMark || |
||||
nextLastRune == apostrophe || |
||||
nextLastRune == fullWidthApostrophe { |
||||
token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize] |
||||
} |
||||
} |
||||
} |
||||
return input |
||||
} |
||||
|
||||
func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||
return NewPossessiveFilter(), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor) |
||||
} |
@ -0,0 +1,33 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/analysis/token/stop" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||
tokenMap, err := cache.TokenMapNamed(StopName) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return stop.NewStopTokensFilter(tokenMap), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) |
||||
} |
@ -0,0 +1,344 @@ |
||||
package en |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
const StopName = "stop_en" |
||||
|
||||
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
|
||||
//
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
// ` was changed to ' to allow for literal string
|
||||
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt |
||||
| This file is distributed under the BSD License. |
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8. |
||||
| - This notice was added. |
||||
| |
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" |
||||
|
||||
| An English stop word list. Comments begin with vertical bar. Each stop |
||||
| word is at the start of a line. |
||||
|
||||
| Many of the forms below are quite rare (e.g. "yourselves") but included for |
||||
| completeness. |
||||
|
||||
| PRONOUNS FORMS |
||||
| 1st person sing |
||||
|
||||
i | subject, always in upper case of course |
||||
|
||||
me | object |
||||
my | possessive adjective |
||||
| the possessive pronoun 'mine' is best suppressed, because of the |
||||
| sense of coal-mine etc. |
||||
myself | reflexive |
||||
| 1st person plural |
||||
we | subject |
||||
|
||||
| us | object |
||||
| care is required here because US = United States. It is usually |
||||
| safe to remove it if it is in lower case. |
||||
our | possessive adjective |
||||
ours | possessive pronoun |
||||
ourselves | reflexive |
||||
| second person (archaic 'thou' forms not included) |
||||
you | subject and object |
||||
your | possessive adjective |
||||
yours | possessive pronoun |
||||
yourself | reflexive (singular) |
||||
yourselves | reflexive (plural) |
||||
| third person singular |
||||
he | subject |
||||
him | object |
||||
his | possessive adjective and pronoun |
||||
himself | reflexive |
||||
|
||||
she | subject |
||||
her | object and possessive adjective |
||||
hers | possessive pronoun |
||||
herself | reflexive |
||||
|
||||
it | subject and object |
||||
its | possessive adjective |
||||
itself | reflexive |
||||
| third person plural |
||||
they | subject |
||||
them | object |
||||
their | possessive adjective |
||||
theirs | possessive pronoun |
||||
themselves | reflexive |
||||
| other forms (demonstratives, interrogatives) |
||||
what |
||||
which |
||||
who |
||||
whom |
||||
this |
||||
that |
||||
these |
||||
those |
||||
|
||||
| VERB FORMS (using F.R. Palmer's nomenclature) |
||||
| BE |
||||
am | 1st person, present |
||||
is | -s form (3rd person, present) |
||||
are | present |
||||
was | 1st person, past |
||||
were | past |
||||
be | infinitive |
||||
been | past participle |
||||
being | -ing form |
||||
| HAVE |
||||
have | simple |
||||
has | -s form |
||||
had | past |
||||
having | -ing form |
||||
| DO |
||||
do | simple |
||||
does | -s form |
||||
did | past |
||||
doing | -ing form |
||||
|
||||
| The forms below are, I believe, best omitted, because of the significant |
||||
| homonym forms: |
||||
|
||||
| He made a WILL |
||||
| old tin CAN |
||||
| merry month of MAY |
||||
| a smell of MUST |
||||
| fight the good fight with all thy MIGHT |
||||
|
||||
| would, could, should, ought might however be included |
||||
|
||||
| | AUXILIARIES |
||||
| | WILL |
||||
|will |
||||
|
||||
would |
||||
|
||||
| | SHALL |
||||
|shall |
||||
|
||||
should |
||||
|
||||
| | CAN |
||||
|can |
||||
|
||||
could |
||||
|
||||
| | MAY |
||||
|may |
||||
|might |
||||
| | MUST |
||||
|must |
||||
| | OUGHT |
||||
|
||||
ought |
||||
|
||||
| COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing |
||||
| pronoun + verb |
||||
|
||||
i'm |
||||
you're |
||||
he's |
||||
she's |
||||
it's |
||||
we're |
||||
they're |
||||
i've |
||||
you've |
||||
we've |
||||
they've |
||||
i'd |
||||
you'd |
||||
he'd |
||||
she'd |
||||
we'd |
||||
they'd |
||||
i'll |
||||
you'll |
||||
he'll |
||||
she'll |
||||
we'll |
||||
they'll |
||||
|
||||
| verb + negation |
||||
|
||||
isn't |
||||
aren't |
||||
wasn't |
||||
weren't |
||||
hasn't |
||||
haven't |
||||
hadn't |
||||
doesn't |
||||
don't |
||||
didn't |
||||
|
||||
| auxiliary + negation |
||||
|
||||
won't |
||||
wouldn't |
||||
shan't |
||||
shouldn't |
||||
can't |
||||
cannot |
||||
couldn't |
||||
mustn't |
||||
|
||||
| miscellaneous forms |
||||
|
||||
let's |
||||
that's |
||||
who's |
||||
what's |
||||
here's |
||||
there's |
||||
when's |
||||
where's |
||||
why's |
||||
how's |
||||
|
||||
| rarer forms |
||||
|
||||
| daren't needn't |
||||
|
||||
| doubtful forms |
||||
|
||||
| oughtn't mightn't |
||||
|
||||
| ARTICLES |
||||
a |
||||
an |
||||
the |
||||
|
||||
| THE REST (Overlap among prepositions, conjunctions, adverbs etc is so |
||||
| high, that classification is pointless.) |
||||
and |
||||
but |
||||
if |
||||
or |
||||
because |
||||
as |
||||
until |
||||
while |
||||
|
||||
of |
||||
at |
||||
by |
||||
for |
||||
with |
||||
about |
||||
against |
||||
between |
||||
into |
||||
through |
||||
during |
||||
before |
||||
after |
||||
above |
||||
below |
||||
to |
||||
from |
||||
up |
||||
down |
||||
in |
||||
out |
||||
on |
||||
off |
||||
over |
||||
under |
||||
|
||||
again |
||||
further |
||||
then |
||||
once |
||||
|
||||
here |
||||
there |
||||
when |
||||
where |
||||
why |
||||
how |
||||
|
||||
all |
||||
any |
||||
both |
||||
each |
||||
few |
||||
more |
||||
most |
||||
other |
||||
some |
||||
such |
||||
|
||||
no |
||||
nor |
||||
not |
||||
only |
||||
own |
||||
same |
||||
so |
||||
than |
||||
too |
||||
very |
||||
|
||||
| Just for the record, the following words are among the commonest in English |
||||
|
||||
| one |
||||
| every |
||||
| least |
||||
| less |
||||
| many |
||||
| now |
||||
| ever |
||||
| never |
||||
| say |
||||
| says |
||||
| said |
||||
| also |
||||
| get |
||||
| go |
||||
| goes |
||||
| just |
||||
| made |
||||
| make |
||||
| put |
||||
| see |
||||
| seen |
||||
| whether |
||||
| like |
||||
| well |
||||
| back |
||||
| even |
||||
| still |
||||
| way |
||||
| take |
||||
| since |
||||
| another |
||||
| however |
||||
| two |
||||
| three |
||||
| four |
||||
| five |
||||
| first |
||||
| second |
||||
| new |
||||
| old |
||||
| high |
||||
| long |
||||
`) |
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { |
||||
rv := analysis.NewTokenMap() |
||||
err := rv.LoadBytes(EnglishStopWords) |
||||
return rv, err |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor) |
||||
} |
@ -0,0 +1,7 @@ |
||||
# full line comment |
||||
marty |
||||
steve # trailing comment |
||||
| different format of comment |
||||
dustin |
||||
siri | different style trailing comment |
||||
multiple words with different whitespace |
@ -0,0 +1,105 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package lowercase implements a TokenFilter which converts
|
||||
// tokens to lower case according to unicode rules.
|
||||
package lowercase |
||||
|
||||
import ( |
||||
"bytes" |
||||
"unicode" |
||||
"unicode/utf8" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
// Name is the name used to register LowerCaseFilter in the bleve registry
|
||||
const Name = "to_lower" |
||||
|
||||
type LowerCaseFilter struct { |
||||
} |
||||
|
||||
func NewLowerCaseFilter() *LowerCaseFilter { |
||||
return &LowerCaseFilter{} |
||||
} |
||||
|
||||
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream { |
||||
for _, token := range input { |
||||
token.Term = toLowerDeferredCopy(token.Term) |
||||
} |
||||
return input |
||||
} |
||||
|
||||
func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||
return NewLowerCaseFilter(), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor) |
||||
} |
||||
|
||||
// toLowerDeferredCopy will function exactly like
|
||||
// bytes.ToLower() only it will reuse (overwrite)
|
||||
// the original byte array when possible
|
||||
// NOTE: because its possible that the lower-case
|
||||
// form of a rune has a different utf-8 encoded
|
||||
// length, in these cases a new byte array is allocated
|
||||
func toLowerDeferredCopy(s []byte) []byte { |
||||
j := 0 |
||||
for i := 0; i < len(s); { |
||||
wid := 1 |
||||
r := rune(s[i]) |
||||
if r >= utf8.RuneSelf { |
||||
r, wid = utf8.DecodeRune(s[i:]) |
||||
} |
||||
|
||||
l := unicode.ToLower(r) |
||||
|
||||
// If the rune is already lowercased, just move to the
|
||||
// next rune.
|
||||
if l == r { |
||||
i += wid |
||||
j += wid |
||||
continue |
||||
} |
||||
|
||||
// Handles the Unicode edge-case where the last
|
||||
// rune in a word on the greek Σ needs to be converted
|
||||
// differently.
|
||||
if l == 'σ' && i+2 == len(s) { |
||||
l = 'ς' |
||||
} |
||||
|
||||
lwid := utf8.RuneLen(l) |
||||
if lwid > wid { |
||||
// utf-8 encoded replacement is wider
|
||||
// for now, punt and defer
|
||||
// to bytes.ToLower() for the remainder
|
||||
// only known to happen with chars
|
||||
// Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
|
||||
// Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
|
||||
rest := bytes.ToLower(s[i:]) |
||||
rv := make([]byte, j+len(rest)) |
||||
copy(rv[:j], s[:j]) |
||||
copy(rv[j:], rest) |
||||
return rv |
||||
} else { |
||||
utf8.EncodeRune(s[j:], l) |
||||
} |
||||
i += wid |
||||
j += lwid |
||||
} |
||||
return s[:j] |
||||
} |
@ -0,0 +1,53 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package porter |
||||
|
||||
import ( |
||||
"bytes" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/registry" |
||||
|
||||
"github.com/blevesearch/go-porterstemmer" |
||||
) |
||||
|
||||
const Name = "stemmer_porter" |
||||
|
||||
type PorterStemmer struct { |
||||
} |
||||
|
||||
func NewPorterStemmer() *PorterStemmer { |
||||
return &PorterStemmer{} |
||||
} |
||||
|
||||
func (s *PorterStemmer) Filter(input analysis.TokenStream) analysis.TokenStream { |
||||
for _, token := range input { |
||||
// if it is not a protected keyword, stem it
|
||||
if !token.KeyWord { |
||||
termRunes := bytes.Runes(token.Term) |
||||
stemmedRunes := porterstemmer.StemWithoutLowerCasing(termRunes) |
||||
token.Term = analysis.BuildTermFromRunes(stemmedRunes) |
||||
} |
||||
} |
||||
return input |
||||
} |
||||
|
||||
func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||
return NewPorterStemmer(), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterTokenFilter(Name, PorterStemmerConstructor) |
||||
} |
@ -0,0 +1,70 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package stop implements a TokenFilter removing tokens found in
|
||||
// a TokenMap.
|
||||
//
|
||||
// It constructor takes the following arguments:
|
||||
//
|
||||
// "stop_token_map" (string): the name of the token map identifying tokens to
|
||||
// remove.
|
||||
package stop |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
const Name = "stop_tokens" |
||||
|
||||
type StopTokensFilter struct { |
||||
stopTokens analysis.TokenMap |
||||
} |
||||
|
||||
func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter { |
||||
return &StopTokensFilter{ |
||||
stopTokens: stopTokens, |
||||
} |
||||
} |
||||
|
||||
func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream { |
||||
j := 0 |
||||
for _, token := range input { |
||||
_, isStopToken := f.stopTokens[string(token.Term)] |
||||
if !isStopToken { |
||||
input[j] = token |
||||
j++ |
||||
} |
||||
} |
||||
|
||||
return input[:j] |
||||
} |
||||
|
||||
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||
stopTokenMapName, ok := config["stop_token_map"].(string) |
||||
if !ok { |
||||
return nil, fmt.Errorf("must specify stop_token_map") |
||||
} |
||||
stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("error building stop words filter: %v", err) |
||||
} |
||||
return NewStopTokensFilter(stopTokenMap), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterTokenFilter(Name, StopTokensFilterConstructor) |
||||
} |
@ -0,0 +1,76 @@ |
||||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package character |
||||
|
||||
import ( |
||||
"unicode/utf8" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
) |
||||
|
||||
type IsTokenRune func(r rune) bool |
||||
|
||||
type CharacterTokenizer struct { |
||||
isTokenRun IsTokenRune |
||||
} |
||||
|
||||
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer { |
||||
return &CharacterTokenizer{ |
||||
isTokenRun: f, |
||||
} |
||||
} |
||||
|
||||
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream { |
||||
|
||||
rv := make(analysis.TokenStream, 0, 1024) |
||||
|
||||
offset := 0 |
||||
start := 0 |
||||
end := 0 |
||||
count := 0 |
||||
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) { |
||||
isToken := c.isTokenRun(currRune) |
||||
if isToken { |
||||
end = offset + size |
||||
} else { |
||||
if end-start > 0 { |
||||
// build token
|
||||
rv = append(rv, &analysis.Token{ |
||||
Term: input[start:end], |
||||
Start: start, |
||||
End: end, |
||||
Position: count + 1, |
||||
Type: analysis.AlphaNumeric, |
||||
}) |
||||
count++ |
||||
} |
||||
start = offset + size |
||||
end = start |
||||
} |
||||
offset += size |
||||
} |
||||
// if we ended in the middle of a token, finish it
|
||||
if end-start > 0 { |
||||
// build token
|
||||
rv = append(rv, &analysis.Token{ |
||||
Term: input[start:end], |
||||
Start: start, |
||||
End: end, |
||||
Position: count + 1, |
||||
Type: analysis.AlphaNumeric, |
||||
}) |
||||
} |
||||
return rv |
||||
} |
@ -0,0 +1,33 @@ |
||||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package letter |
||||
|
||||
import ( |
||||
"unicode" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/analysis/tokenizer/character" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
const Name = "letter" |
||||
|
||||
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { |
||||
return character.NewCharacterTokenizer(unicode.IsLetter), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterTokenizer(Name, TokenizerConstructor) |
||||
} |
@ -0,0 +1,131 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package unicode |
||||
|
||||
import ( |
||||
"github.com/blevesearch/segment" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
const Name = "unicode" |
||||
|
||||
type UnicodeTokenizer struct { |
||||
} |
||||
|
||||
func NewUnicodeTokenizer() *UnicodeTokenizer { |
||||
return &UnicodeTokenizer{} |
||||
} |
||||
|
||||
func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream { |
||||
rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx.
|
||||
rv := make(analysis.TokenStream, 0, 1) |
||||
|
||||
ta := []analysis.Token(nil) |
||||
taNext := 0 |
||||
|
||||
segmenter := segment.NewWordSegmenterDirect(input) |
||||
start := 0 |
||||
pos := 1 |
||||
|
||||
guessRemaining := func(end int) int { |
||||
avgSegmentLen := end / (len(rv) + 1) |
||||
if avgSegmentLen < 1 { |
||||
avgSegmentLen = 1 |
||||
} |
||||
|
||||
remainingLen := len(input) - end |
||||
|
||||
return remainingLen / avgSegmentLen |
||||
} |
||||
|
||||
for segmenter.Segment() { |
||||
segmentBytes := segmenter.Bytes() |
||||
end := start + len(segmentBytes) |
||||
if segmenter.Type() != segment.None { |
||||
if taNext >= len(ta) { |
||||
remainingSegments := guessRemaining(end) |
||||
if remainingSegments > 1000 { |
||||
remainingSegments = 1000 |
||||
} |
||||
if remainingSegments < 1 { |
||||
remainingSegments = 1 |
||||
} |
||||
|
||||
ta = make([]analysis.Token, remainingSegments) |
||||
taNext = 0 |
||||
} |
||||
|
||||
token := &ta[taNext] |
||||
taNext++ |
||||
|
||||
token.Term = segmentBytes |
||||
token.Start = start |
||||
token.End = end |
||||
token.Position = pos |
||||
token.Type = convertType(segmenter.Type()) |
||||
|
||||
if len(rv) >= cap(rv) { // When rv is full, save it into rvx.
|
||||
rvx = append(rvx, rv) |
||||
|
||||
rvCap := cap(rv) * 2 |
||||
if rvCap > 256 { |
||||
rvCap = 256 |
||||
} |
||||
|
||||
rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger.
|
||||
} |
||||
|
||||
rv = append(rv, token) |
||||
pos++ |
||||
} |
||||
start = end |
||||
} |
||||
|
||||
if len(rvx) > 0 { |
||||
n := len(rv) |
||||
for _, r := range rvx { |
||||
n += len(r) |
||||
} |
||||
rall := make(analysis.TokenStream, 0, n) |
||||
for _, r := range rvx { |
||||
rall = append(rall, r...) |
||||
} |
||||
return append(rall, rv...) |
||||
} |
||||
|
||||
return rv |
||||
} |
||||
|
||||
func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { |
||||
return NewUnicodeTokenizer(), nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor) |
||||
} |
||||
|
||||
func convertType(segmentWordType int) analysis.TokenType { |
||||
switch segmentWordType { |
||||
case segment.Ideo: |
||||
return analysis.Ideographic |
||||
case segment.Kana: |
||||
return analysis.Ideographic |
||||
case segment.Number: |
||||
return analysis.Numeric |
||||
} |
||||
return analysis.AlphaNumeric |
||||
} |
@ -0,0 +1,76 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis |
||||
|
||||
import ( |
||||
"bufio" |
||||
"bytes" |
||||
"io" |
||||
"io/ioutil" |
||||
"strings" |
||||
) |
||||
|
||||
type TokenMap map[string]bool |
||||
|
||||
func NewTokenMap() TokenMap { |
||||
return make(TokenMap, 0) |
||||
} |
||||
|
||||
// LoadFile reads in a list of tokens from a text file,
|
||||
// one per line.
|
||||
// Comments are supported using `#` or `|`
|
||||
func (t TokenMap) LoadFile(filename string) error { |
||||
data, err := ioutil.ReadFile(filename) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
return t.LoadBytes(data) |
||||
} |
||||
|
||||
// LoadBytes reads in a list of tokens from memory,
|
||||
// one per line.
|
||||
// Comments are supported using `#` or `|`
|
||||
func (t TokenMap) LoadBytes(data []byte) error { |
||||
bytesReader := bytes.NewReader(data) |
||||
bufioReader := bufio.NewReader(bytesReader) |
||||
line, err := bufioReader.ReadString('\n') |
||||
for err == nil { |
||||
t.LoadLine(line) |
||||
line, err = bufioReader.ReadString('\n') |
||||
} |
||||
// if the err was EOF we still need to process the last value
|
||||
if err == io.EOF { |
||||
t.LoadLine(line) |
||||
return nil |
||||
} |
||||
return err |
||||
} |
||||
|
||||
func (t TokenMap) LoadLine(line string) { |
||||
// find the start of a comment, if any
|
||||
startComment := strings.IndexAny(line, "#|") |
||||
if startComment >= 0 { |
||||
line = line[:startComment] |
||||
} |
||||
|
||||
tokens := strings.Fields(line) |
||||
for _, token := range tokens { |
||||
t.AddToken(token) |
||||
} |
||||
} |
||||
|
||||
func (t TokenMap) AddToken(token string) { |
||||
t[token] = true |
||||
} |
@ -0,0 +1,103 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis |
||||
|
||||
import ( |
||||
"fmt" |
||||
"time" |
||||
) |
||||
|
||||
type CharFilter interface { |
||||
Filter([]byte) []byte |
||||
} |
||||
|
||||
type TokenType int |
||||
|
||||
const ( |
||||
AlphaNumeric TokenType = iota |
||||
Ideographic |
||||
Numeric |
||||
DateTime |
||||
Shingle |
||||
Single |
||||
Double |
||||
Boolean |
||||
) |
||||
|
||||
// Token represents one occurrence of a term at a particular location in a
|
||||
// field.
|
||||
type Token struct { |
||||
// Start specifies the byte offset of the beginning of the term in the
|
||||
// field.
|
||||
Start int `json:"start"` |
||||
|
||||
// End specifies the byte offset of the end of the term in the field.
|
||||
End int `json:"end"` |
||||
Term []byte `json:"term"` |
||||
|
||||
// Position specifies the 1-based index of the token in the sequence of
|
||||
// occurrences of its term in the field.
|
||||
Position int `json:"position"` |
||||
Type TokenType `json:"type"` |
||||
KeyWord bool `json:"keyword"` |
||||
} |
||||
|
||||
func (t *Token) String() string { |
||||
return fmt.Sprintf("Start: %d End: %d Position: %d Token: %s Type: %d", t.Start, t.End, t.Position, string(t.Term), t.Type) |
||||
} |
||||
|
||||
type TokenStream []*Token |
||||
|
||||
// A Tokenizer splits an input string into tokens, the usual behaviour being to
|
||||
// map words to tokens.
|
||||
type Tokenizer interface { |
||||
Tokenize([]byte) TokenStream |
||||
} |
||||
|
||||
// A TokenFilter adds, transforms or removes tokens from a token stream.
|
||||
type TokenFilter interface { |
||||
Filter(TokenStream) TokenStream |
||||
} |
||||
|
||||
type Analyzer struct { |
||||
CharFilters []CharFilter |
||||
Tokenizer Tokenizer |
||||
TokenFilters []TokenFilter |
||||
} |
||||
|
||||
func (a *Analyzer) Analyze(input []byte) TokenStream { |
||||
if a.CharFilters != nil { |
||||
for _, cf := range a.CharFilters { |
||||
input = cf.Filter(input) |
||||
} |
||||
} |
||||
tokens := a.Tokenizer.Tokenize(input) |
||||
if a.TokenFilters != nil { |
||||
for _, tf := range a.TokenFilters { |
||||
tokens = tf.Filter(tokens) |
||||
} |
||||
} |
||||
return tokens |
||||
} |
||||
|
||||
var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts") |
||||
|
||||
type DateTimeParser interface { |
||||
ParseDateTime(string) (time.Time, error) |
||||
} |
||||
|
||||
type ByteArrayConverter interface { |
||||
Convert([]byte) (interface{}, error) |
||||
} |
@ -0,0 +1,92 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis |
||||
|
||||
import ( |
||||
"bytes" |
||||
"unicode/utf8" |
||||
) |
||||
|
||||
func DeleteRune(in []rune, pos int) []rune { |
||||
if pos >= len(in) { |
||||
return in |
||||
} |
||||
copy(in[pos:], in[pos+1:]) |
||||
return in[:len(in)-1] |
||||
} |
||||
|
||||
func InsertRune(in []rune, pos int, r rune) []rune { |
||||
// create a new slice 1 rune larger
|
||||
rv := make([]rune, len(in)+1) |
||||
// copy the characters before the insert pos
|
||||
copy(rv[0:pos], in[0:pos]) |
||||
// set the inserted rune
|
||||
rv[pos] = r |
||||
// copy the characters after the insert pos
|
||||
copy(rv[pos+1:], in[pos:]) |
||||
return rv |
||||
} |
||||
|
||||
// BuildTermFromRunesOptimistic will build a term from the provided runes
|
||||
// AND optimistically attempt to encode into the provided buffer
|
||||
// if at any point it appears the buffer is too small, a new buffer is
|
||||
// allocated and that is used instead
|
||||
// this should be used in cases where frequently the new term is the same
|
||||
// length or shorter than the original term (in number of bytes)
|
||||
func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte { |
||||
rv := buf |
||||
used := 0 |
||||
for _, r := range runes { |
||||
nextLen := utf8.RuneLen(r) |
||||
if used+nextLen > len(rv) { |
||||
// alloc new buf
|
||||
buf = make([]byte, len(runes)*utf8.UTFMax) |
||||
// copy work we've already done
|
||||
copy(buf, rv[:used]) |
||||
rv = buf |
||||
} |
||||
written := utf8.EncodeRune(rv[used:], r) |
||||
used += written |
||||
} |
||||
return rv[:used] |
||||
} |
||||
|
||||
func BuildTermFromRunes(runes []rune) []byte { |
||||
return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes) |
||||
} |
||||
|
||||
func TruncateRunes(input []byte, num int) []byte { |
||||
runes := bytes.Runes(input) |
||||
runes = runes[:len(runes)-num] |
||||
out := BuildTermFromRunes(runes) |
||||
return out |
||||
} |
||||
|
||||
func RunesEndsWith(input []rune, suffix string) bool { |
||||
inputLen := len(input) |
||||
suffixRunes := []rune(suffix) |
||||
suffixLen := len(suffixRunes) |
||||
if suffixLen > inputLen { |
||||
return false |
||||
} |
||||
|
||||
for i := suffixLen - 1; i >= 0; i-- { |
||||
if input[inputLen-(suffixLen-i)] != suffixRunes[i] { |
||||
return false |
||||
} |
||||
} |
||||
|
||||
return true |
||||
} |
@ -0,0 +1,88 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"expvar" |
||||
"io/ioutil" |
||||
"log" |
||||
"time" |
||||
|
||||
"github.com/blevesearch/bleve/index" |
||||
"github.com/blevesearch/bleve/index/store/gtreap" |
||||
"github.com/blevesearch/bleve/index/upsidedown" |
||||
"github.com/blevesearch/bleve/registry" |
||||
"github.com/blevesearch/bleve/search/highlight/highlighter/html" |
||||
) |
||||
|
||||
var bleveExpVar = expvar.NewMap("bleve") |
||||
|
||||
type configuration struct { |
||||
Cache *registry.Cache |
||||
DefaultHighlighter string |
||||
DefaultKVStore string |
||||
DefaultMemKVStore string |
||||
DefaultIndexType string |
||||
SlowSearchLogThreshold time.Duration |
||||
analysisQueue *index.AnalysisQueue |
||||
} |
||||
|
||||
func (c *configuration) SetAnalysisQueueSize(n int) { |
||||
c.analysisQueue = index.NewAnalysisQueue(n) |
||||
} |
||||
|
||||
func newConfiguration() *configuration { |
||||
return &configuration{ |
||||
Cache: registry.NewCache(), |
||||
analysisQueue: index.NewAnalysisQueue(4), |
||||
} |
||||
} |
||||
|
||||
// Config contains library level configuration
|
||||
var Config *configuration |
||||
|
||||
func init() { |
||||
bootStart := time.Now() |
||||
|
||||
// build the default configuration
|
||||
Config = newConfiguration() |
||||
|
||||
// set the default highlighter
|
||||
Config.DefaultHighlighter = html.Name |
||||
|
||||
// default kv store
|
||||
Config.DefaultKVStore = "" |
||||
|
||||
// default mem only kv store
|
||||
Config.DefaultMemKVStore = gtreap.Name |
||||
|
||||
// default index
|
||||
Config.DefaultIndexType = upsidedown.Name |
||||
|
||||
bootDuration := time.Since(bootStart) |
||||
bleveExpVar.Add("bootDuration", int64(bootDuration)) |
||||
indexStats = NewIndexStats() |
||||
bleveExpVar.Set("indexes", indexStats) |
||||
|
||||
initDisk() |
||||
} |
||||
|
||||
var logger = log.New(ioutil.Discard, "bleve", log.LstdFlags) |
||||
|
||||
// SetLog sets the logger used for logging
|
||||
// by default log messages are sent to ioutil.Discard
|
||||
func SetLog(l *log.Logger) { |
||||
logger = l |
||||
} |
@ -0,0 +1,23 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build appengine appenginevm
|
||||
|
||||
package bleve |
||||
|
||||
// in the appengine environment we cannot support disk based indexes
|
||||
// so we do no extra configuration in this method
|
||||
func initDisk() { |
||||
|
||||
} |
@ -0,0 +1,25 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !appengine,!appenginevm
|
||||
|
||||
package bleve |
||||
|
||||
import "github.com/blevesearch/bleve/index/store/boltdb" |
||||
|
||||
// in normal environments we configure boltdb as the default storage
|
||||
func initDisk() { |
||||
// default kv store
|
||||
Config.DefaultKVStore = boltdb.Name |
||||
} |
@ -0,0 +1,38 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/* |
||||
Package bleve is a library for indexing and searching text. |
||||
|
||||
Example Opening New Index, Indexing Data |
||||
|
||||
message := struct{ |
||||
Id: "example" |
||||
From: "marty.schoch@gmail.com", |
||||
Body: "bleve indexing is easy", |
||||
} |
||||
|
||||
mapping := bleve.NewIndexMapping() |
||||
index, _ := bleve.New("example.bleve", mapping) |
||||
index.Index(message.Id, message) |
||||
|
||||
Example Opening Existing Index, Searching Data |
||||
|
||||
index, _ := bleve.Open("example.bleve") |
||||
query := bleve.NewQueryStringQuery("bleve") |
||||
searchRequest := bleve.NewSearchRequest(query) |
||||
searchResult, _ := index.Search(searchRequest) |
||||
|
||||
*/ |
||||
package bleve |
@ -0,0 +1,75 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document |
||||
|
||||
import "fmt" |
||||
|
||||
type Document struct { |
||||
ID string `json:"id"` |
||||
Fields []Field `json:"fields"` |
||||
CompositeFields []*CompositeField |
||||
Number uint64 `json:"-"` |
||||
} |
||||
|
||||
func NewDocument(id string) *Document { |
||||
return &Document{ |
||||
ID: id, |
||||
Fields: make([]Field, 0), |
||||
CompositeFields: make([]*CompositeField, 0), |
||||
} |
||||
} |
||||
|
||||
func (d *Document) AddField(f Field) *Document { |
||||
switch f := f.(type) { |
||||
case *CompositeField: |
||||
d.CompositeFields = append(d.CompositeFields, f) |
||||
default: |
||||
d.Fields = append(d.Fields, f) |
||||
} |
||||
return d |
||||
} |
||||
|
||||
func (d *Document) GoString() string { |
||||
fields := "" |
||||
for i, field := range d.Fields { |
||||
if i != 0 { |
||||
fields += ", " |
||||
} |
||||
fields += fmt.Sprintf("%#v", field) |
||||
} |
||||
compositeFields := "" |
||||
for i, field := range d.CompositeFields { |
||||
if i != 0 { |
||||
compositeFields += ", " |
||||
} |
||||
compositeFields += fmt.Sprintf("%#v", field) |
||||
} |
||||
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields) |
||||
} |
||||
|
||||
func (d *Document) NumPlainTextBytes() uint64 { |
||||
rv := uint64(0) |
||||
for _, field := range d.Fields { |
||||
rv += field.NumPlainTextBytes() |
||||
} |
||||
for _, compositeField := range d.CompositeFields { |
||||
for _, field := range d.Fields { |
||||
if compositeField.includesField(field.Name()) { |
||||
rv += field.NumPlainTextBytes() |
||||
} |
||||
} |
||||
} |
||||
return rv |
||||
} |
@ -0,0 +1,39 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/analysis" |
||||
) |
||||
|
||||
type Field interface { |
||||
// Name returns the path of the field from the root DocumentMapping.
|
||||
// A root field path is "field", a subdocument field is "parent.field".
|
||||
Name() string |
||||
// ArrayPositions returns the intermediate document and field indices
|
||||
// required to resolve the field value in the document. For example, if the
|
||||
// field path is "doc1.doc2.field" where doc1 and doc2 are slices or
|
||||
// arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in
|
||||
// "doc1", then "field" in "doc2".
|
||||
ArrayPositions() []uint64 |
||||
Options() IndexingOptions |
||||
Analyze() (int, analysis.TokenFrequencies) |
||||
Value() []byte |
||||
|
||||
// NumPlainTextBytes should return the number of plain text bytes
|
||||
// that this field represents - this is a common metric for tracking
|
||||
// the rate of indexing
|
||||
NumPlainTextBytes() uint64 |
||||
} |
@ -0,0 +1,107 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
) |
||||
|
||||
const DefaultBooleanIndexingOptions = StoreField | IndexField |
||||
|
||||
type BooleanField struct { |
||||
name string |
||||
arrayPositions []uint64 |
||||
options IndexingOptions |
||||
value []byte |
||||
numPlainTextBytes uint64 |
||||
} |
||||
|
||||
func (b *BooleanField) Name() string { |
||||
return b.name |
||||
} |
||||
|
||||
func (b *BooleanField) ArrayPositions() []uint64 { |
||||
return b.arrayPositions |
||||
} |
||||
|
||||
func (b *BooleanField) Options() IndexingOptions { |
||||
return b.options |
||||
} |
||||
|
||||
func (b *BooleanField) Analyze() (int, analysis.TokenFrequencies) { |
||||
tokens := make(analysis.TokenStream, 0) |
||||
tokens = append(tokens, &analysis.Token{ |
||||
Start: 0, |
||||
End: len(b.value), |
||||
Term: b.value, |
||||
Position: 1, |
||||
Type: analysis.Boolean, |
||||
}) |
||||
|
||||
fieldLength := len(tokens) |
||||
tokenFreqs := analysis.TokenFrequency(tokens, b.arrayPositions, b.options.IncludeTermVectors()) |
||||
return fieldLength, tokenFreqs |
||||
} |
||||
|
||||
func (b *BooleanField) Value() []byte { |
||||
return b.value |
||||
} |
||||
|
||||
func (b *BooleanField) Boolean() (bool, error) { |
||||
if len(b.value) == 1 { |
||||
return b.value[0] == 'T', nil |
||||
} |
||||
return false, fmt.Errorf("boolean field has %d bytes", len(b.value)) |
||||
} |
||||
|
||||
func (b *BooleanField) GoString() string { |
||||
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value) |
||||
} |
||||
|
||||
func (b *BooleanField) NumPlainTextBytes() uint64 { |
||||
return b.numPlainTextBytes |
||||
} |
||||
|
||||
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField { |
||||
return &BooleanField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
value: value, |
||||
options: DefaultNumericIndexingOptions, |
||||
numPlainTextBytes: uint64(len(value)), |
||||
} |
||||
} |
||||
|
||||
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField { |
||||
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultNumericIndexingOptions) |
||||
} |
||||
|
||||
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField { |
||||
numPlainTextBytes := 5 |
||||
v := []byte("F") |
||||
if b { |
||||
numPlainTextBytes = 4 |
||||
v = []byte("T") |
||||
} |
||||
return &BooleanField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
value: v, |
||||
options: options, |
||||
numPlainTextBytes: uint64(numPlainTextBytes), |
||||
} |
||||
} |
@ -0,0 +1,99 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/analysis" |
||||
) |
||||
|
||||
const DefaultCompositeIndexingOptions = IndexField |
||||
|
||||
type CompositeField struct { |
||||
name string |
||||
includedFields map[string]bool |
||||
excludedFields map[string]bool |
||||
defaultInclude bool |
||||
options IndexingOptions |
||||
totalLength int |
||||
compositeFrequencies analysis.TokenFrequencies |
||||
} |
||||
|
||||
func NewCompositeField(name string, defaultInclude bool, include []string, exclude []string) *CompositeField { |
||||
return NewCompositeFieldWithIndexingOptions(name, defaultInclude, include, exclude, DefaultCompositeIndexingOptions) |
||||
} |
||||
|
||||
func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, include []string, exclude []string, options IndexingOptions) *CompositeField { |
||||
rv := &CompositeField{ |
||||
name: name, |
||||
options: options, |
||||
defaultInclude: defaultInclude, |
||||
includedFields: make(map[string]bool, len(include)), |
||||
excludedFields: make(map[string]bool, len(exclude)), |
||||
compositeFrequencies: make(analysis.TokenFrequencies), |
||||
} |
||||
|
||||
for _, i := range include { |
||||
rv.includedFields[i] = true |
||||
} |
||||
for _, e := range exclude { |
||||
rv.excludedFields[e] = true |
||||
} |
||||
|
||||
return rv |
||||
} |
||||
|
||||
func (c *CompositeField) Name() string { |
||||
return c.name |
||||
} |
||||
|
||||
func (c *CompositeField) ArrayPositions() []uint64 { |
||||
return []uint64{} |
||||
} |
||||
|
||||
func (c *CompositeField) Options() IndexingOptions { |
||||
return c.options |
||||
} |
||||
|
||||
func (c *CompositeField) Analyze() (int, analysis.TokenFrequencies) { |
||||
return c.totalLength, c.compositeFrequencies |
||||
} |
||||
|
||||
func (c *CompositeField) Value() []byte { |
||||
return []byte{} |
||||
} |
||||
|
||||
func (c *CompositeField) NumPlainTextBytes() uint64 { |
||||
return 0 |
||||
} |
||||
|
||||
func (c *CompositeField) includesField(field string) bool { |
||||
shouldInclude := c.defaultInclude |
||||
_, fieldShouldBeIncluded := c.includedFields[field] |
||||
if fieldShouldBeIncluded { |
||||
shouldInclude = true |
||||
} |
||||
_, fieldShouldBeExcluded := c.excludedFields[field] |
||||
if fieldShouldBeExcluded { |
||||
shouldInclude = false |
||||
} |
||||
return shouldInclude |
||||
} |
||||
|
||||
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) { |
||||
if c.includesField(field) { |
||||
c.totalLength += length |
||||
c.compositeFrequencies.MergeAll(field, freq) |
||||
} |
||||
} |
@ -0,0 +1,144 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document |
||||
|
||||
import ( |
||||
"fmt" |
||||
"math" |
||||
"time" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/numeric" |
||||
) |
||||
|
||||
const DefaultDateTimeIndexingOptions = StoreField | IndexField |
||||
const DefaultDateTimePrecisionStep uint = 4 |
||||
|
||||
var MinTimeRepresentable = time.Unix(0, math.MinInt64) |
||||
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64) |
||||
|
||||
type DateTimeField struct { |
||||
name string |
||||
arrayPositions []uint64 |
||||
options IndexingOptions |
||||
value numeric.PrefixCoded |
||||
numPlainTextBytes uint64 |
||||
} |
||||
|
||||
func (n *DateTimeField) Name() string { |
||||
return n.name |
||||
} |
||||
|
||||
func (n *DateTimeField) ArrayPositions() []uint64 { |
||||
return n.arrayPositions |
||||
} |
||||
|
||||
func (n *DateTimeField) Options() IndexingOptions { |
||||
return n.options |
||||
} |
||||
|
||||
func (n *DateTimeField) Analyze() (int, analysis.TokenFrequencies) { |
||||
tokens := make(analysis.TokenStream, 0) |
||||
tokens = append(tokens, &analysis.Token{ |
||||
Start: 0, |
||||
End: len(n.value), |
||||
Term: n.value, |
||||
Position: 1, |
||||
Type: analysis.DateTime, |
||||
}) |
||||
|
||||
original, err := n.value.Int64() |
||||
if err == nil { |
||||
|
||||
shift := DefaultDateTimePrecisionStep |
||||
for shift < 64 { |
||||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) |
||||
if err != nil { |
||||
break |
||||
} |
||||
token := analysis.Token{ |
||||
Start: 0, |
||||
End: len(shiftEncoded), |
||||
Term: shiftEncoded, |
||||
Position: 1, |
||||
Type: analysis.DateTime, |
||||
} |
||||
tokens = append(tokens, &token) |
||||
shift += DefaultDateTimePrecisionStep |
||||
} |
||||
} |
||||
|
||||
fieldLength := len(tokens) |
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) |
||||
return fieldLength, tokenFreqs |
||||
} |
||||
|
||||
func (n *DateTimeField) Value() []byte { |
||||
return n.value |
||||
} |
||||
|
||||
func (n *DateTimeField) DateTime() (time.Time, error) { |
||||
i64, err := n.value.Int64() |
||||
if err != nil { |
||||
return time.Time{}, err |
||||
} |
||||
return time.Unix(0, i64).UTC(), nil |
||||
} |
||||
|
||||
func (n *DateTimeField) GoString() string { |
||||
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) |
||||
} |
||||
|
||||
func (n *DateTimeField) NumPlainTextBytes() uint64 { |
||||
return n.numPlainTextBytes |
||||
} |
||||
|
||||
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField { |
||||
return &DateTimeField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
value: value, |
||||
options: DefaultDateTimeIndexingOptions, |
||||
numPlainTextBytes: uint64(len(value)), |
||||
} |
||||
} |
||||
|
||||
func NewDateTimeField(name string, arrayPositions []uint64, dt time.Time) (*DateTimeField, error) { |
||||
return NewDateTimeFieldWithIndexingOptions(name, arrayPositions, dt, DefaultDateTimeIndexingOptions) |
||||
} |
||||
|
||||
func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, dt time.Time, options IndexingOptions) (*DateTimeField, error) { |
||||
if canRepresent(dt) { |
||||
dtInt64 := dt.UnixNano() |
||||
prefixCoded := numeric.MustNewPrefixCodedInt64(dtInt64, 0) |
||||
return &DateTimeField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
value: prefixCoded, |
||||
options: options, |
||||
// not correct, just a place holder until we revisit how fields are
|
||||
// represented and can fix this better
|
||||
numPlainTextBytes: uint64(8), |
||||
}, nil |
||||
} |
||||
return nil, fmt.Errorf("cannot represent %s in this type", dt) |
||||
} |
||||
|
||||
func canRepresent(dt time.Time) bool { |
||||
if dt.Before(MinTimeRepresentable) || dt.After(MaxTimeRepresentable) { |
||||
return false |
||||
} |
||||
return true |
||||
} |
@ -0,0 +1,130 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/numeric" |
||||
) |
||||
|
||||
const DefaultNumericIndexingOptions = StoreField | IndexField |
||||
|
||||
const DefaultPrecisionStep uint = 4 |
||||
|
||||
type NumericField struct { |
||||
name string |
||||
arrayPositions []uint64 |
||||
options IndexingOptions |
||||
value numeric.PrefixCoded |
||||
numPlainTextBytes uint64 |
||||
} |
||||
|
||||
func (n *NumericField) Name() string { |
||||
return n.name |
||||
} |
||||
|
||||
func (n *NumericField) ArrayPositions() []uint64 { |
||||
return n.arrayPositions |
||||
} |
||||
|
||||
func (n *NumericField) Options() IndexingOptions { |
||||
return n.options |
||||
} |
||||
|
||||
func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) { |
||||
tokens := make(analysis.TokenStream, 0) |
||||
tokens = append(tokens, &analysis.Token{ |
||||
Start: 0, |
||||
End: len(n.value), |
||||
Term: n.value, |
||||
Position: 1, |
||||
Type: analysis.Numeric, |
||||
}) |
||||
|
||||
original, err := n.value.Int64() |
||||
if err == nil { |
||||
|
||||
shift := DefaultPrecisionStep |
||||
for shift < 64 { |
||||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) |
||||
if err != nil { |
||||
break |
||||
} |
||||
token := analysis.Token{ |
||||
Start: 0, |
||||
End: len(shiftEncoded), |
||||
Term: shiftEncoded, |
||||
Position: 1, |
||||
Type: analysis.Numeric, |
||||
} |
||||
tokens = append(tokens, &token) |
||||
shift += DefaultPrecisionStep |
||||
} |
||||
} |
||||
|
||||
fieldLength := len(tokens) |
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) |
||||
return fieldLength, tokenFreqs |
||||
} |
||||
|
||||
func (n *NumericField) Value() []byte { |
||||
return n.value |
||||
} |
||||
|
||||
func (n *NumericField) Number() (float64, error) { |
||||
i64, err := n.value.Int64() |
||||
if err != nil { |
||||
return 0.0, err |
||||
} |
||||
return numeric.Int64ToFloat64(i64), nil |
||||
} |
||||
|
||||
func (n *NumericField) GoString() string { |
||||
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) |
||||
} |
||||
|
||||
func (n *NumericField) NumPlainTextBytes() uint64 { |
||||
return n.numPlainTextBytes |
||||
} |
||||
|
||||
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField { |
||||
return &NumericField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
value: value, |
||||
options: DefaultNumericIndexingOptions, |
||||
numPlainTextBytes: uint64(len(value)), |
||||
} |
||||
} |
||||
|
||||
func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField { |
||||
return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DefaultNumericIndexingOptions) |
||||
} |
||||
|
||||
func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options IndexingOptions) *NumericField { |
||||
numberInt64 := numeric.Float64ToInt64(number) |
||||
prefixCoded := numeric.MustNewPrefixCodedInt64(numberInt64, 0) |
||||
return &NumericField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
value: prefixCoded, |
||||
options: options, |
||||
// not correct, just a place holder until we revisit how fields are
|
||||
// represented and can fix this better
|
||||
numPlainTextBytes: uint64(8), |
||||
} |
||||
} |
@ -0,0 +1,119 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
) |
||||
|
||||
const DefaultTextIndexingOptions = IndexField |
||||
|
||||
type TextField struct { |
||||
name string |
||||
arrayPositions []uint64 |
||||
options IndexingOptions |
||||
analyzer *analysis.Analyzer |
||||
value []byte |
||||
numPlainTextBytes uint64 |
||||
} |
||||
|
||||
func (t *TextField) Name() string { |
||||
return t.name |
||||
} |
||||
|
||||
func (t *TextField) ArrayPositions() []uint64 { |
||||
return t.arrayPositions |
||||
} |
||||
|
||||
func (t *TextField) Options() IndexingOptions { |
||||
return t.options |
||||
} |
||||
|
||||
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { |
||||
var tokens analysis.TokenStream |
||||
if t.analyzer != nil { |
||||
bytesToAnalyze := t.Value() |
||||
if t.options.IsStored() { |
||||
// need to copy
|
||||
bytesCopied := make([]byte, len(bytesToAnalyze)) |
||||
copy(bytesCopied, bytesToAnalyze) |
||||
bytesToAnalyze = bytesCopied |
||||
} |
||||
tokens = t.analyzer.Analyze(bytesToAnalyze) |
||||
} else { |
||||
tokens = analysis.TokenStream{ |
||||
&analysis.Token{ |
||||
Start: 0, |
||||
End: len(t.value), |
||||
Term: t.value, |
||||
Position: 1, |
||||
Type: analysis.AlphaNumeric, |
||||
}, |
||||
} |
||||
} |
||||
fieldLength := len(tokens) // number of tokens in this doc field
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors()) |
||||
return fieldLength, tokenFreqs |
||||
} |
||||
|
||||
func (t *TextField) Value() []byte { |
||||
return t.value |
||||
} |
||||
|
||||
func (t *TextField) GoString() string { |
||||
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions) |
||||
} |
||||
|
||||
func (t *TextField) NumPlainTextBytes() uint64 { |
||||
return t.numPlainTextBytes |
||||
} |
||||
|
||||
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField { |
||||
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions) |
||||
} |
||||
|
||||
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField { |
||||
return &TextField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
options: options, |
||||
value: value, |
||||
numPlainTextBytes: uint64(len(value)), |
||||
} |
||||
} |
||||
|
||||
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField { |
||||
return &TextField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
options: DefaultTextIndexingOptions, |
||||
analyzer: analyzer, |
||||
value: value, |
||||
numPlainTextBytes: uint64(len(value)), |
||||
} |
||||
} |
||||
|
||||
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField { |
||||
return &TextField{ |
||||
name: name, |
||||
arrayPositions: arrayPositions, |
||||
options: options, |
||||
analyzer: analyzer, |
||||
value: value, |
||||
numPlainTextBytes: uint64(len(value)), |
||||
} |
||||
} |
@ -0,0 +1,55 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document |
||||
|
||||
type IndexingOptions int |
||||
|
||||
const ( |
||||
IndexField IndexingOptions = 1 << iota |
||||
StoreField |
||||
IncludeTermVectors |
||||
) |
||||
|
||||
func (o IndexingOptions) IsIndexed() bool { |
||||
return o&IndexField != 0 |
||||
} |
||||
|
||||
func (o IndexingOptions) IsStored() bool { |
||||
return o&StoreField != 0 |
||||
} |
||||
|
||||
func (o IndexingOptions) IncludeTermVectors() bool { |
||||
return o&IncludeTermVectors != 0 |
||||
} |
||||
|
||||
func (o IndexingOptions) String() string { |
||||
rv := "" |
||||
if o.IsIndexed() { |
||||
rv += "INDEXED" |
||||
} |
||||
if o.IsStored() { |
||||
if rv != "" { |
||||
rv += ", " |
||||
} |
||||
rv += "STORE" |
||||
} |
||||
if o.IncludeTermVectors() { |
||||
if rv != "" { |
||||
rv += ", " |
||||
} |
||||
rv += "TV" |
||||
} |
||||
return rv |
||||
} |
@ -0,0 +1,52 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
// Constant Error values which can be compared to determine the type of error
|
||||
const ( |
||||
ErrorIndexPathExists Error = iota |
||||
ErrorIndexPathDoesNotExist |
||||
ErrorIndexMetaMissing |
||||
ErrorIndexMetaCorrupt |
||||
ErrorUnknownStorageType |
||||
ErrorIndexClosed |
||||
ErrorAliasMulti |
||||
ErrorAliasEmpty |
||||
ErrorUnknownIndexType |
||||
ErrorEmptyID |
||||
ErrorIndexReadInconsistency |
||||
) |
||||
|
||||
// Error represents a more strongly typed bleve error for detecting
|
||||
// and handling specific types of errors.
|
||||
type Error int |
||||
|
||||
func (e Error) Error() string { |
||||
return errorMessages[e] |
||||
} |
||||
|
||||
var errorMessages = map[Error]string{ |
||||
ErrorIndexPathExists: "cannot create new index, path already exists", |
||||
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist", |
||||
ErrorIndexMetaMissing: "cannot open index, metadata missing", |
||||
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt", |
||||
ErrorUnknownStorageType: "unknown storage type", |
||||
ErrorIndexClosed: "index is closed", |
||||
ErrorAliasMulti: "cannot perform single index operation on multiple index alias", |
||||
ErrorAliasEmpty: "cannot perform operation on empty alias", |
||||
ErrorUnknownIndexType: "unknown index type", |
||||
ErrorEmptyID: "document ID cannot be empty", |
||||
ErrorIndexReadInconsistency: "index read inconsistency detected", |
||||
} |
@ -0,0 +1,243 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/document" |
||||
"github.com/blevesearch/bleve/index" |
||||
"github.com/blevesearch/bleve/index/store" |
||||
"github.com/blevesearch/bleve/mapping" |
||||
"golang.org/x/net/context" |
||||
) |
||||
|
||||
// A Batch groups together multiple Index and Delete
|
||||
// operations you would like performed at the same
|
||||
// time. The Batch structure is NOT thread-safe.
|
||||
// You should only perform operations on a batch
|
||||
// from a single thread at a time. Once batch
|
||||
// execution has started, you may not modify it.
|
||||
type Batch struct { |
||||
index Index |
||||
internal *index.Batch |
||||
} |
||||
|
||||
// Index adds the specified index operation to the
|
||||
// batch. NOTE: the bleve Index is not updated
|
||||
// until the batch is executed.
|
||||
func (b *Batch) Index(id string, data interface{}) error { |
||||
if id == "" { |
||||
return ErrorEmptyID |
||||
} |
||||
doc := document.NewDocument(id) |
||||
err := b.index.Mapping().MapDocument(doc, data) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
b.internal.Update(doc) |
||||
return nil |
||||
} |
||||
|
||||
// Delete adds the specified delete operation to the
|
||||
// batch. NOTE: the bleve Index is not updated until
|
||||
// the batch is executed.
|
||||
func (b *Batch) Delete(id string) { |
||||
if id != "" { |
||||
b.internal.Delete(id) |
||||
} |
||||
} |
||||
|
||||
// SetInternal adds the specified set internal
|
||||
// operation to the batch. NOTE: the bleve Index is
|
||||
// not updated until the batch is executed.
|
||||
func (b *Batch) SetInternal(key, val []byte) { |
||||
b.internal.SetInternal(key, val) |
||||
} |
||||
|
||||
// SetInternal adds the specified delete internal
|
||||
// operation to the batch. NOTE: the bleve Index is
|
||||
// not updated until the batch is executed.
|
||||
func (b *Batch) DeleteInternal(key []byte) { |
||||
b.internal.DeleteInternal(key) |
||||
} |
||||
|
||||
// Size returns the total number of operations inside the batch
|
||||
// including normal index operations and internal operations.
|
||||
func (b *Batch) Size() int { |
||||
return len(b.internal.IndexOps) + len(b.internal.InternalOps) |
||||
} |
||||
|
||||
// String prints a user friendly string representation of what
|
||||
// is inside this batch.
|
||||
func (b *Batch) String() string { |
||||
return b.internal.String() |
||||
} |
||||
|
||||
// Reset returns a Batch to the empty state so that it can
|
||||
// be re-used in the future.
|
||||
func (b *Batch) Reset() { |
||||
b.internal.Reset() |
||||
} |
||||
|
||||
// An Index implements all the indexing and searching
|
||||
// capabilities of bleve. An Index can be created
|
||||
// using the New() and Open() methods.
|
||||
//
|
||||
// Index() takes an input value, deduces a DocumentMapping for its type,
|
||||
// assigns string paths to its fields or values then applies field mappings on
|
||||
// them.
|
||||
//
|
||||
// The DocumentMapping used to index a value is deduced by the following rules:
|
||||
// 1) If value implements Classifier interface, resolve the mapping from Type().
|
||||
// 2) If value has a string field or value at IndexMapping.TypeField.
|
||||
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing
|
||||
// is described below.
|
||||
// 3) If IndexMapping.DefaultType is registered, return it.
|
||||
// 4) Return IndexMapping.DefaultMapping.
|
||||
//
|
||||
// Each field or nested field of the value is identified by a string path, then
|
||||
// mapped to one or several FieldMappings which extract the result for analysis.
|
||||
//
|
||||
// Struct values fields are identified by their "json:" tag, or by their name.
|
||||
// Nested fields are identified by prefixing with their parent identifier,
|
||||
// separated by a dot.
|
||||
//
|
||||
// Map values entries are identified by their string key. Entries not indexed
|
||||
// by strings are ignored. Entry values are identified recursively like struct
|
||||
// fields.
|
||||
//
|
||||
// Slice and array values are identified by their field name. Their elements
|
||||
// are processed sequentially with the same FieldMapping.
|
||||
//
|
||||
// String, float64 and time.Time values are identified by their field name.
|
||||
// Other types are ignored.
|
||||
//
|
||||
// Each value identifier is decomposed in its parts and recursively address
|
||||
// SubDocumentMappings in the tree starting at the root DocumentMapping. If a
|
||||
// mapping is found, all its FieldMappings are applied to the value. If no
|
||||
// mapping is found and the root DocumentMapping is dynamic, default mappings
|
||||
// are used based on value type and IndexMapping default configurations.
|
||||
//
|
||||
// Finally, mapped values are analyzed, indexed or stored. See
|
||||
// FieldMapping.Analyzer to know how an analyzer is resolved for a given field.
|
||||
//
|
||||
// Examples:
|
||||
//
|
||||
// type Date struct {
|
||||
// Day string `json:"day"`
|
||||
// Month string
|
||||
// Year string
|
||||
// }
|
||||
//
|
||||
// type Person struct {
|
||||
// FirstName string `json:"first_name"`
|
||||
// LastName string
|
||||
// BirthDate Date `json:"birth_date"`
|
||||
// }
|
||||
//
|
||||
// A Person value FirstName is mapped by the SubDocumentMapping at
|
||||
// "first_name". Its LastName is mapped by the one at "LastName". The day of
|
||||
// BirthDate is mapped to the SubDocumentMapping "day" of the root
|
||||
// SubDocumentMapping "birth_date". It will appear as the "birth_date.day"
|
||||
// field in the index. The month is mapped to "birth_date.Month".
|
||||
type Index interface { |
||||
// Index analyzes, indexes or stores mapped data fields. Supplied
|
||||
// identifier is bound to analyzed data and will be retrieved by search
|
||||
// requests. See Index interface documentation for details about mapping
|
||||
// rules.
|
||||
Index(id string, data interface{}) error |
||||
Delete(id string) error |
||||
|
||||
NewBatch() *Batch |
||||
Batch(b *Batch) error |
||||
|
||||
// Document returns specified document or nil if the document is not
|
||||
// indexed or stored.
|
||||
Document(id string) (*document.Document, error) |
||||
// DocCount returns the number of documents in the index.
|
||||
DocCount() (uint64, error) |
||||
|
||||
Search(req *SearchRequest) (*SearchResult, error) |
||||
SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) |
||||
|
||||
Fields() ([]string, error) |
||||
|
||||
FieldDict(field string) (index.FieldDict, error) |
||||
FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) |
||||
FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) |
||||
|
||||
Close() error |
||||
|
||||
Mapping() mapping.IndexMapping |
||||
|
||||
Stats() *IndexStat |
||||
StatsMap() map[string]interface{} |
||||
|
||||
GetInternal(key []byte) ([]byte, error) |
||||
SetInternal(key, val []byte) error |
||||
DeleteInternal(key []byte) error |
||||
|
||||
// Name returns the name of the index (by default this is the path)
|
||||
Name() string |
||||
// SetName lets you assign your own logical name to this index
|
||||
SetName(string) |
||||
|
||||
// Advanced returns the indexer and data store, exposing lower level
|
||||
// methods to enumerate records and access data.
|
||||
Advanced() (index.Index, store.KVStore, error) |
||||
} |
||||
|
||||
// New index at the specified path, must not exist.
|
||||
// The provided mapping will be used for all
|
||||
// Index/Search operations.
|
||||
func New(path string, mapping mapping.IndexMapping) (Index, error) { |
||||
return newIndexUsing(path, mapping, Config.DefaultIndexType, Config.DefaultKVStore, nil) |
||||
} |
||||
|
||||
// NewMemOnly creates a memory-only index.
|
||||
// The contents of the index is NOT persisted,
|
||||
// and will be lost once closed.
|
||||
// The provided mapping will be used for all
|
||||
// Index/Search operations.
|
||||
func NewMemOnly(mapping mapping.IndexMapping) (Index, error) { |
||||
return newIndexUsing("", mapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil) |
||||
} |
||||
|
||||
// NewUsing creates index at the specified path,
|
||||
// which must not already exist.
|
||||
// The provided mapping will be used for all
|
||||
// Index/Search operations.
|
||||
// The specified index type will be used.
|
||||
// The specified kvstore implementation will be used
|
||||
// and the provided kvconfig will be passed to its
|
||||
// constructor. Note that currently the values of kvconfig must
|
||||
// be able to be marshaled and unmarshaled using the encoding/json library (used
|
||||
// when reading/writing the index metadata file).
|
||||
func NewUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (Index, error) { |
||||
return newIndexUsing(path, mapping, indexType, kvstore, kvconfig) |
||||
} |
||||
|
||||
// Open index at the specified path, must exist.
|
||||
// The mapping used when it was created will be used for all Index/Search operations.
|
||||
func Open(path string) (Index, error) { |
||||
return openIndexUsing(path, nil) |
||||
} |
||||
|
||||
// OpenUsing opens index at the specified path, must exist.
|
||||
// The mapping used when it was created will be used for all Index/Search operations.
|
||||
// The provided runtimeConfig can override settings
|
||||
// persisted when the kvstore was created.
|
||||
func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) { |
||||
return openIndexUsing(path, runtimeConfig) |
||||
} |
@ -0,0 +1,83 @@ |
||||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package index |
||||
|
||||
import "github.com/blevesearch/bleve/document" |
||||
|
||||
type IndexRow interface { |
||||
KeySize() int |
||||
KeyTo([]byte) (int, error) |
||||
Key() []byte |
||||
|
||||
ValueSize() int |
||||
ValueTo([]byte) (int, error) |
||||
Value() []byte |
||||
} |
||||
|
||||
type AnalysisResult struct { |
||||
DocID string |
||||
Rows []IndexRow |
||||
} |
||||
|
||||
type AnalysisWork struct { |
||||
i Index |
||||
d *document.Document |
||||
rc chan *AnalysisResult |
||||
} |
||||
|
||||
func NewAnalysisWork(i Index, d *document.Document, rc chan *AnalysisResult) *AnalysisWork { |
||||
return &AnalysisWork{ |
||||
i: i, |
||||
d: d, |
||||
rc: rc, |
||||
} |
||||
} |
||||
|
||||
type AnalysisQueue struct { |
||||
queue chan *AnalysisWork |
||||
done chan struct{} |
||||
} |
||||
|
||||
func (q *AnalysisQueue) Queue(work *AnalysisWork) { |
||||
q.queue <- work |
||||
} |
||||
|
||||
func (q *AnalysisQueue) Close() { |
||||
close(q.done) |
||||
} |
||||
|
||||
func NewAnalysisQueue(numWorkers int) *AnalysisQueue { |
||||
rv := AnalysisQueue{ |
||||
queue: make(chan *AnalysisWork), |
||||
done: make(chan struct{}), |
||||
} |
||||
for i := 0; i < numWorkers; i++ { |
||||
go AnalysisWorker(rv) |
||||
} |
||||
return &rv |
||||
} |
||||
|
||||
func AnalysisWorker(q AnalysisQueue) { |
||||
// read work off the queue
|
||||
for { |
||||
select { |
||||
case <-q.done: |
||||
return |
||||
case w := <-q.queue: |
||||
r := w.i.Analyze(w.d) |
||||
w.rc <- r |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,88 @@ |
||||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package index |
||||
|
||||
import ( |
||||
"sync" |
||||
) |
||||
|
||||
type FieldCache struct { |
||||
fieldIndexes map[string]uint16 |
||||
indexFields []string |
||||
lastFieldIndex int |
||||
mutex sync.RWMutex |
||||
} |
||||
|
||||
func NewFieldCache() *FieldCache { |
||||
return &FieldCache{ |
||||
fieldIndexes: make(map[string]uint16), |
||||
lastFieldIndex: -1, |
||||
} |
||||
} |
||||
|
||||
func (f *FieldCache) AddExisting(field string, index uint16) { |
||||
f.mutex.Lock() |
||||
f.addLOCKED(field, index) |
||||
f.mutex.Unlock() |
||||
} |
||||
|
||||
func (f *FieldCache) addLOCKED(field string, index uint16) uint16 { |
||||
f.fieldIndexes[field] = index |
||||
if len(f.indexFields) < int(index)+1 { |
||||
prevIndexFields := f.indexFields |
||||
f.indexFields = make([]string, int(index)+16) |
||||
copy(f.indexFields, prevIndexFields) |
||||
} |
||||
f.indexFields[int(index)] = field |
||||
if int(index) > f.lastFieldIndex { |
||||
f.lastFieldIndex = int(index) |
||||
} |
||||
return index |
||||
} |
||||
|
||||
// FieldNamed returns the index of the field, and whether or not it existed
|
||||
// before this call. if createIfMissing is true, and new field index is assigned
|
||||
// but the second return value will still be false
|
||||
func (f *FieldCache) FieldNamed(field string, createIfMissing bool) (uint16, bool) { |
||||
f.mutex.RLock() |
||||
if index, ok := f.fieldIndexes[field]; ok { |
||||
f.mutex.RUnlock() |
||||
return index, true |
||||
} else if !createIfMissing { |
||||
f.mutex.RUnlock() |
||||
return 0, false |
||||
} |
||||
// trade read lock for write lock
|
||||
f.mutex.RUnlock() |
||||
f.mutex.Lock() |
||||
// need to check again with write lock
|
||||
if index, ok := f.fieldIndexes[field]; ok { |
||||
f.mutex.Unlock() |
||||
return index, true |
||||
} |
||||
// assign next field id
|
||||
index := f.addLOCKED(field, uint16(f.lastFieldIndex+1)) |
||||
f.mutex.Unlock() |
||||
return index, false |
||||
} |
||||
|
||||
func (f *FieldCache) FieldIndexed(index uint16) (field string) { |
||||
f.mutex.RLock() |
||||
if int(index) < len(f.indexFields) { |
||||
field = f.indexFields[int(index)] |
||||
} |
||||
f.mutex.RUnlock() |
||||
return field |
||||
} |
@ -0,0 +1,239 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package index |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/json" |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/document" |
||||
"github.com/blevesearch/bleve/index/store" |
||||
) |
||||
|
||||
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") |
||||
|
||||
type Index interface { |
||||
Open() error |
||||
Close() error |
||||
|
||||
Update(doc *document.Document) error |
||||
Delete(id string) error |
||||
Batch(batch *Batch) error |
||||
|
||||
SetInternal(key, val []byte) error |
||||
DeleteInternal(key []byte) error |
||||
|
||||
// Reader returns a low-level accessor on the index data. Close it to
|
||||
// release associated resources.
|
||||
Reader() (IndexReader, error) |
||||
|
||||
Stats() json.Marshaler |
||||
StatsMap() map[string]interface{} |
||||
|
||||
Analyze(d *document.Document) *AnalysisResult |
||||
|
||||
Advanced() (store.KVStore, error) |
||||
} |
||||
|
||||
type IndexReader interface { |
||||
TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error) |
||||
|
||||
// DocIDReader returns an iterator over all doc ids
|
||||
// The caller must close returned instance to release associated resources.
|
||||
DocIDReaderAll() (DocIDReader, error) |
||||
|
||||
DocIDReaderOnly(ids []string) (DocIDReader, error) |
||||
|
||||
FieldDict(field string) (FieldDict, error) |
||||
|
||||
// FieldDictRange is currently defined to include the start and end terms
|
||||
FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error) |
||||
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) |
||||
|
||||
Document(id string) (*document.Document, error) |
||||
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error) |
||||
|
||||
Fields() ([]string, error) |
||||
|
||||
GetInternal(key []byte) ([]byte, error) |
||||
|
||||
DocCount() (uint64, error) |
||||
|
||||
ExternalID(id IndexInternalID) (string, error) |
||||
InternalID(id string) (IndexInternalID, error) |
||||
|
||||
DumpAll() chan interface{} |
||||
DumpDoc(id string) chan interface{} |
||||
DumpFields() chan interface{} |
||||
|
||||
Close() error |
||||
} |
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
type FieldTerms map[string][]string |
||||
|
||||
// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields
|
||||
func (f FieldTerms) FieldsNotYetCached(fields []string) []string { |
||||
rv := make([]string, 0, len(fields)) |
||||
for _, field := range fields { |
||||
if _, ok := f[field]; !ok { |
||||
rv = append(rv, field) |
||||
} |
||||
} |
||||
return rv |
||||
} |
||||
|
||||
// Merge will combine two FieldTerms
|
||||
// it assumes that the terms lists are complete (thus do not need to be merged)
|
||||
// field terms from the other list always replace the ones in the receiver
|
||||
func (f FieldTerms) Merge(other FieldTerms) { |
||||
for field, terms := range other { |
||||
f[field] = terms |
||||
} |
||||
} |
||||
|
||||
type TermFieldVector struct { |
||||
Field string |
||||
ArrayPositions []uint64 |
||||
Pos uint64 |
||||
Start uint64 |
||||
End uint64 |
||||
} |
||||
|
||||
// IndexInternalID is an opaque document identifier interal to the index impl
|
||||
type IndexInternalID []byte |
||||
|
||||
func (id IndexInternalID) Equals(other IndexInternalID) bool { |
||||
return id.Compare(other) == 0 |
||||
} |
||||
|
||||
func (id IndexInternalID) Compare(other IndexInternalID) int { |
||||
return bytes.Compare(id, other) |
||||
} |
||||
|
||||
type TermFieldDoc struct { |
||||
Term string |
||||
ID IndexInternalID |
||||
Freq uint64 |
||||
Norm float64 |
||||
Vectors []*TermFieldVector |
||||
} |
||||
|
||||
// Reset allows an already allocated TermFieldDoc to be reused
|
||||
func (tfd *TermFieldDoc) Reset() *TermFieldDoc { |
||||
// remember the []byte used for the ID
|
||||
id := tfd.ID |
||||
// idiom to copy over from empty TermFieldDoc (0 allocations)
|
||||
*tfd = TermFieldDoc{} |
||||
// reuse the []byte already allocated (and reset len to 0)
|
||||
tfd.ID = id[:0] |
||||
return tfd |
||||
} |
||||
|
||||
// TermFieldReader is the interface exposing the enumeration of documents
|
||||
// containing a given term in a given field. Documents are returned in byte
|
||||
// lexicographic order over their identifiers.
|
||||
type TermFieldReader interface { |
||||
// Next returns the next document containing the term in this field, or nil
|
||||
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc
|
||||
// is optional, and when non-nil, will be used instead of allocating memory.
|
||||
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error) |
||||
|
||||
// Advance resets the enumeration at specified document or its immediate
|
||||
// follower.
|
||||
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error) |
||||
|
||||
// Count returns the number of documents contains the term in this field.
|
||||
Count() uint64 |
||||
Close() error |
||||
} |
||||
|
||||
type DictEntry struct { |
||||
Term string |
||||
Count uint64 |
||||
} |
||||
|
||||
type FieldDict interface { |
||||
Next() (*DictEntry, error) |
||||
Close() error |
||||
} |
||||
|
||||
// DocIDReader is the interface exposing enumeration of documents identifiers.
|
||||
// Close the reader to release associated resources.
|
||||
type DocIDReader interface { |
||||
// Next returns the next document internal identifier in the natural
|
||||
// index order, nil when the end of the sequence is reached.
|
||||
Next() (IndexInternalID, error) |
||||
|
||||
// Advance resets the iteration to the first internal identifier greater than
|
||||
// or equal to ID. If ID is smaller than the start of the range, the iteration
|
||||
// will start there instead. If ID is greater than or equal to the end of
|
||||
// the range, Next() call will return io.EOF.
|
||||
Advance(ID IndexInternalID) (IndexInternalID, error) |
||||
Close() error |
||||
} |
||||
|
||||
type Batch struct { |
||||
IndexOps map[string]*document.Document |
||||
InternalOps map[string][]byte |
||||
} |
||||
|
||||
func NewBatch() *Batch { |
||||
return &Batch{ |
||||
IndexOps: make(map[string]*document.Document), |
||||
InternalOps: make(map[string][]byte), |
||||
} |
||||
} |
||||
|
||||
func (b *Batch) Update(doc *document.Document) { |
||||
b.IndexOps[doc.ID] = doc |
||||
} |
||||
|
||||
func (b *Batch) Delete(id string) { |
||||
b.IndexOps[id] = nil |
||||
} |
||||
|
||||
func (b *Batch) SetInternal(key, val []byte) { |
||||
b.InternalOps[string(key)] = val |
||||
} |
||||
|
||||
func (b *Batch) DeleteInternal(key []byte) { |
||||
b.InternalOps[string(key)] = nil |
||||
} |
||||
|
||||
func (b *Batch) String() string { |
||||
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps)) |
||||
for k, v := range b.IndexOps { |
||||
if v != nil { |
||||
rv += fmt.Sprintf("\tINDEX - '%s'\n", k) |
||||
} else { |
||||
rv += fmt.Sprintf("\tDELETE - '%s'\n", k) |
||||
} |
||||
} |
||||
for k, v := range b.InternalOps { |
||||
if v != nil { |
||||
rv += fmt.Sprintf("\tSET INTERNAL - '%s'\n", k) |
||||
} else { |
||||
rv += fmt.Sprintf("\tDELETE INTERNAL - '%s'\n", k) |
||||
} |
||||
} |
||||
return rv |
||||
} |
||||
|
||||
func (b *Batch) Reset() { |
||||
b.IndexOps = make(map[string]*document.Document) |
||||
b.InternalOps = make(map[string][]byte) |
||||
} |
@ -0,0 +1,62 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package store |
||||
|
||||
type op struct { |
||||
K []byte |
||||
V []byte |
||||
} |
||||
|
||||
type EmulatedBatch struct { |
||||
Ops []*op |
||||
Merger *EmulatedMerge |
||||
} |
||||
|
||||
func NewEmulatedBatch(mo MergeOperator) *EmulatedBatch { |
||||
return &EmulatedBatch{ |
||||
Ops: make([]*op, 0, 1000), |
||||
Merger: NewEmulatedMerge(mo), |
||||
} |
||||
} |
||||
|
||||
func (b *EmulatedBatch) Set(key, val []byte) { |
||||
ck := make([]byte, len(key)) |
||||
copy(ck, key) |
||||
cv := make([]byte, len(val)) |
||||
copy(cv, val) |
||||
b.Ops = append(b.Ops, &op{ck, cv}) |
||||
} |
||||
|
||||
func (b *EmulatedBatch) Delete(key []byte) { |
||||
ck := make([]byte, len(key)) |
||||
copy(ck, key) |
||||
b.Ops = append(b.Ops, &op{ck, nil}) |
||||
} |
||||
|
||||
func (b *EmulatedBatch) Merge(key, val []byte) { |
||||
ck := make([]byte, len(key)) |
||||
copy(ck, key) |
||||
cv := make([]byte, len(val)) |
||||
copy(cv, val) |
||||
b.Merger.Merge(key, val) |
||||
} |
||||
|
||||
func (b *EmulatedBatch) Reset() { |
||||
b.Ops = b.Ops[:0] |
||||
} |
||||
|
||||
func (b *EmulatedBatch) Close() error { |
||||
return nil |
||||
} |
@ -0,0 +1,85 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb |
||||
|
||||
import ( |
||||
"bytes" |
||||
|
||||
"github.com/boltdb/bolt" |
||||
) |
||||
|
||||
type Iterator struct { |
||||
store *Store |
||||
tx *bolt.Tx |
||||
cursor *bolt.Cursor |
||||
prefix []byte |
||||
start []byte |
||||
end []byte |
||||
valid bool |
||||
key []byte |
||||
val []byte |
||||
} |
||||
|
||||
func (i *Iterator) updateValid() { |
||||
i.valid = (i.key != nil) |
||||
if i.valid { |
||||
if i.prefix != nil { |
||||
i.valid = bytes.HasPrefix(i.key, i.prefix) |
||||
} else if i.end != nil { |
||||
i.valid = bytes.Compare(i.key, i.end) < 0 |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (i *Iterator) Seek(k []byte) { |
||||
if i.start != nil && bytes.Compare(k, i.start) < 0 { |
||||
k = i.start |
||||
} |
||||
if i.prefix != nil && !bytes.HasPrefix(k, i.prefix) { |
||||
if bytes.Compare(k, i.prefix) < 0 { |
||||
k = i.prefix |
||||
} else { |
||||
i.valid = false |
||||
return |
||||
} |
||||
} |
||||
i.key, i.val = i.cursor.Seek(k) |
||||
i.updateValid() |
||||
} |
||||
|
||||
func (i *Iterator) Next() { |
||||
i.key, i.val = i.cursor.Next() |
||||
i.updateValid() |
||||
} |
||||
|
||||
func (i *Iterator) Current() ([]byte, []byte, bool) { |
||||
return i.key, i.val, i.valid |
||||
} |
||||
|
||||
func (i *Iterator) Key() []byte { |
||||
return i.key |
||||
} |
||||
|
||||
func (i *Iterator) Value() []byte { |
||||
return i.val |
||||
} |
||||
|
||||
func (i *Iterator) Valid() bool { |
||||
return i.valid |
||||
} |
||||
|
||||
func (i *Iterator) Close() error { |
||||
return nil |
||||
} |
@ -0,0 +1,73 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/index/store" |
||||
"github.com/boltdb/bolt" |
||||
) |
||||
|
||||
type Reader struct { |
||||
store *Store |
||||
tx *bolt.Tx |
||||
bucket *bolt.Bucket |
||||
} |
||||
|
||||
func (r *Reader) Get(key []byte) ([]byte, error) { |
||||
var rv []byte |
||||
v := r.bucket.Get(key) |
||||
if v != nil { |
||||
rv = make([]byte, len(v)) |
||||
copy(rv, v) |
||||
} |
||||
return rv, nil |
||||
} |
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { |
||||
return store.MultiGet(r, keys) |
||||
} |
||||
|
||||
func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { |
||||
cursor := r.bucket.Cursor() |
||||
|
||||
rv := &Iterator{ |
||||
store: r.store, |
||||
tx: r.tx, |
||||
cursor: cursor, |
||||
prefix: prefix, |
||||
} |
||||
|
||||
rv.Seek(prefix) |
||||
return rv |
||||
} |
||||
|
||||
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { |
||||
cursor := r.bucket.Cursor() |
||||
|
||||
rv := &Iterator{ |
||||
store: r.store, |
||||
tx: r.tx, |
||||
cursor: cursor, |
||||
start: start, |
||||
end: end, |
||||
} |
||||
|
||||
rv.Seek(start) |
||||
return rv |
||||
} |
||||
|
||||
func (r *Reader) Close() error { |
||||
return r.tx.Rollback() |
||||
} |
@ -0,0 +1,26 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb |
||||
|
||||
import "encoding/json" |
||||
|
||||
type stats struct { |
||||
s *Store |
||||
} |
||||
|
||||
func (s *stats) MarshalJSON() ([]byte, error) { |
||||
bs := s.s.db.Stats() |
||||
return json.Marshal(bs) |
||||
} |
@ -0,0 +1,175 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package boltdb implements a store.KVStore on top of BoltDB. It supports the
|
||||
// following options:
|
||||
//
|
||||
// "bucket" (string): the name of BoltDB bucket to use, defaults to "bleve".
|
||||
//
|
||||
// "nosync" (bool): if true, set boltdb.DB.NoSync to true. It speeds up index
|
||||
// operations in exchange of losing integrity guarantees if indexation aborts
|
||||
// without closing the index. Use it when rebuilding indexes from zero.
|
||||
package boltdb |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/json" |
||||
"fmt" |
||||
"os" |
||||
|
||||
"github.com/blevesearch/bleve/index/store" |
||||
"github.com/blevesearch/bleve/registry" |
||||
"github.com/boltdb/bolt" |
||||
) |
||||
|
||||
const ( |
||||
Name = "boltdb" |
||||
defaultCompactBatchSize = 100 |
||||
) |
||||
|
||||
type Store struct { |
||||
path string |
||||
bucket string |
||||
db *bolt.DB |
||||
noSync bool |
||||
fillPercent float64 |
||||
mo store.MergeOperator |
||||
} |
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { |
||||
path, ok := config["path"].(string) |
||||
if !ok { |
||||
return nil, fmt.Errorf("must specify path") |
||||
} |
||||
if path == "" { |
||||
return nil, os.ErrInvalid |
||||
} |
||||
|
||||
bucket, ok := config["bucket"].(string) |
||||
if !ok { |
||||
bucket = "bleve" |
||||
} |
||||
|
||||
noSync, _ := config["nosync"].(bool) |
||||
|
||||
fillPercent, ok := config["fillPercent"].(float64) |
||||
if !ok { |
||||
fillPercent = bolt.DefaultFillPercent |
||||
} |
||||
|
||||
bo := &bolt.Options{} |
||||
ro, ok := config["read_only"].(bool) |
||||
if ok { |
||||
bo.ReadOnly = ro |
||||
} |
||||
|
||||
db, err := bolt.Open(path, 0600, bo) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
db.NoSync = noSync |
||||
|
||||
if !bo.ReadOnly { |
||||
err = db.Update(func(tx *bolt.Tx) error { |
||||
_, err := tx.CreateBucketIfNotExists([]byte(bucket)) |
||||
|
||||
return err |
||||
}) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
} |
||||
|
||||
rv := Store{ |
||||
path: path, |
||||
bucket: bucket, |
||||
db: db, |
||||
mo: mo, |
||||
noSync: noSync, |
||||
fillPercent: fillPercent, |
||||
} |
||||
return &rv, nil |
||||
} |
||||
|
||||
func (bs *Store) Close() error { |
||||
return bs.db.Close() |
||||
} |
||||
|
||||
func (bs *Store) Reader() (store.KVReader, error) { |
||||
tx, err := bs.db.Begin(false) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return &Reader{ |
||||
store: bs, |
||||
tx: tx, |
||||
bucket: tx.Bucket([]byte(bs.bucket)), |
||||
}, nil |
||||
} |
||||
|
||||
func (bs *Store) Writer() (store.KVWriter, error) { |
||||
return &Writer{ |
||||
store: bs, |
||||
}, nil |
||||
} |
||||
|
||||
func (bs *Store) Stats() json.Marshaler { |
||||
return &stats{ |
||||
s: bs, |
||||
} |
||||
} |
||||
|
||||
// CompactWithBatchSize removes DictionaryTerm entries with a count of zero (in batchSize batches)
|
||||
// Removing entries is a workaround for github issue #374.
|
||||
func (bs *Store) CompactWithBatchSize(batchSize int) error { |
||||
for { |
||||
cnt := 0 |
||||
err := bs.db.Batch(func(tx *bolt.Tx) error { |
||||
c := tx.Bucket([]byte(bs.bucket)).Cursor() |
||||
prefix := []byte("d") |
||||
|
||||
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() { |
||||
if bytes.Equal(v, []byte{0}) { |
||||
cnt++ |
||||
if err := c.Delete(); err != nil { |
||||
return err |
||||
} |
||||
if cnt == batchSize { |
||||
break |
||||
} |
||||
} |
||||
|
||||
} |
||||
return nil |
||||
}) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
if cnt == 0 { |
||||
break |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// Compact calls CompactWithBatchSize with a default batch size of 100. This is a workaround
|
||||
// for github issue #374.
|
||||
func (bs *Store) Compact() error { |
||||
return bs.CompactWithBatchSize(defaultCompactBatchSize) |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterKVStore(Name, New) |
||||
} |
@ -0,0 +1,95 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package boltdb |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/index/store" |
||||
) |
||||
|
||||
type Writer struct { |
||||
store *Store |
||||
} |
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch { |
||||
return store.NewEmulatedBatch(w.store.mo) |
||||
} |
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { |
||||
return make([]byte, options.TotalBytes), w.NewBatch(), nil |
||||
} |
||||
|
||||
func (w *Writer) ExecuteBatch(batch store.KVBatch) (err error) { |
||||
|
||||
emulatedBatch, ok := batch.(*store.EmulatedBatch) |
||||
if !ok { |
||||
return fmt.Errorf("wrong type of batch") |
||||
} |
||||
|
||||
tx, err := w.store.db.Begin(true) |
||||
if err != nil { |
||||
return |
||||
} |
||||
// defer function to ensure that once started,
|
||||
// we either Commit tx or Rollback
|
||||
defer func() { |
||||
// if nothing went wrong, commit
|
||||
if err == nil { |
||||
// careful to catch error here too
|
||||
err = tx.Commit() |
||||
} else { |
||||
// caller should see error that caused abort,
|
||||
// not success or failure of Rollback itself
|
||||
_ = tx.Rollback() |
||||
} |
||||
}() |
||||
|
||||
bucket := tx.Bucket([]byte(w.store.bucket)) |
||||
bucket.FillPercent = w.store.fillPercent |
||||
|
||||
for k, mergeOps := range emulatedBatch.Merger.Merges { |
||||
kb := []byte(k) |
||||
existingVal := bucket.Get(kb) |
||||
mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps) |
||||
if !fullMergeOk { |
||||
err = fmt.Errorf("merge operator returned failure") |
||||
return |
||||
} |
||||
err = bucket.Put(kb, mergedVal) |
||||
if err != nil { |
||||
return |
||||
} |
||||
} |
||||
|
||||
for _, op := range emulatedBatch.Ops { |
||||
if op.V != nil { |
||||
err = bucket.Put(op.K, op.V) |
||||
if err != nil { |
||||
return |
||||
} |
||||
} else { |
||||
err = bucket.Delete(op.K) |
||||
if err != nil { |
||||
return |
||||
} |
||||
} |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (w *Writer) Close() error { |
||||
return nil |
||||
} |
@ -0,0 +1,152 @@ |
||||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap |
||||
|
||||
import ( |
||||
"bytes" |
||||
"sync" |
||||
|
||||
"github.com/steveyen/gtreap" |
||||
) |
||||
|
||||
type Iterator struct { |
||||
t *gtreap.Treap |
||||
|
||||
m sync.Mutex |
||||
cancelCh chan struct{} |
||||
nextCh chan *Item |
||||
curr *Item |
||||
currOk bool |
||||
|
||||
prefix []byte |
||||
start []byte |
||||
end []byte |
||||
} |
||||
|
||||
func (w *Iterator) Seek(k []byte) { |
||||
if w.start != nil && bytes.Compare(k, w.start) < 0 { |
||||
k = w.start |
||||
} |
||||
if w.prefix != nil && !bytes.HasPrefix(k, w.prefix) { |
||||
if bytes.Compare(k, w.prefix) < 0 { |
||||
k = w.prefix |
||||
} else { |
||||
var end []byte |
||||
for i := len(w.prefix) - 1; i >= 0; i-- { |
||||
c := w.prefix[i] |
||||
if c < 0xff { |
||||
end = make([]byte, i+1) |
||||
copy(end, w.prefix) |
||||
end[i] = c + 1 |
||||
break |
||||
} |
||||
} |
||||
k = end |
||||
} |
||||
} |
||||
w.restart(&Item{k: k}) |
||||
} |
||||
|
||||
func (w *Iterator) restart(start *Item) *Iterator { |
||||
cancelCh := make(chan struct{}) |
||||
nextCh := make(chan *Item, 1) |
||||
|
||||
w.m.Lock() |
||||
if w.cancelCh != nil { |
||||
close(w.cancelCh) |
||||
} |
||||
w.cancelCh = cancelCh |
||||
w.nextCh = nextCh |
||||
w.curr = nil |
||||
w.currOk = false |
||||
w.m.Unlock() |
||||
|
||||
go func() { |
||||
if start != nil { |
||||
w.t.VisitAscend(start, func(itm gtreap.Item) bool { |
||||
select { |
||||
case <-cancelCh: |
||||
return false |
||||
case nextCh <- itm.(*Item): |
||||
return true |
||||
} |
||||
}) |
||||
} |
||||
close(nextCh) |
||||
}() |
||||
|
||||
w.Next() |
||||
|
||||
return w |
||||
} |
||||
|
||||
func (w *Iterator) Next() { |
||||
w.m.Lock() |
||||
nextCh := w.nextCh |
||||
w.m.Unlock() |
||||
w.curr, w.currOk = <-nextCh |
||||
} |
||||
|
||||
func (w *Iterator) Current() ([]byte, []byte, bool) { |
||||
w.m.Lock() |
||||
defer w.m.Unlock() |
||||
if !w.currOk || w.curr == nil { |
||||
return nil, nil, false |
||||
} |
||||
if w.prefix != nil && !bytes.HasPrefix(w.curr.k, w.prefix) { |
||||
return nil, nil, false |
||||
} else if w.end != nil && bytes.Compare(w.curr.k, w.end) >= 0 { |
||||
return nil, nil, false |
||||
} |
||||
return w.curr.k, w.curr.v, w.currOk |
||||
} |
||||
|
||||
func (w *Iterator) Key() []byte { |
||||
k, _, ok := w.Current() |
||||
if !ok { |
||||
return nil |
||||
} |
||||
return k |
||||
} |
||||
|
||||
func (w *Iterator) Value() []byte { |
||||
_, v, ok := w.Current() |
||||
if !ok { |
||||
return nil |
||||
} |
||||
return v |
||||
} |
||||
|
||||
func (w *Iterator) Valid() bool { |
||||
_, _, ok := w.Current() |
||||
return ok |
||||
} |
||||
|
||||
func (w *Iterator) Close() error { |
||||
w.m.Lock() |
||||
if w.cancelCh != nil { |
||||
close(w.cancelCh) |
||||
} |
||||
w.cancelCh = nil |
||||
w.nextCh = nil |
||||
w.curr = nil |
||||
w.currOk = false |
||||
w.m.Unlock() |
||||
|
||||
return nil |
||||
} |
@ -0,0 +1,66 @@ |
||||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/index/store" |
||||
|
||||
"github.com/steveyen/gtreap" |
||||
) |
||||
|
||||
type Reader struct { |
||||
t *gtreap.Treap |
||||
} |
||||
|
||||
func (w *Reader) Get(k []byte) (v []byte, err error) { |
||||
var rv []byte |
||||
itm := w.t.Get(&Item{k: k}) |
||||
if itm != nil { |
||||
rv = make([]byte, len(itm.(*Item).v)) |
||||
copy(rv, itm.(*Item).v) |
||||
return rv, nil |
||||
} |
||||
return nil, nil |
||||
} |
||||
|
||||
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { |
||||
return store.MultiGet(r, keys) |
||||
} |
||||
|
||||
func (w *Reader) PrefixIterator(k []byte) store.KVIterator { |
||||
rv := Iterator{ |
||||
t: w.t, |
||||
prefix: k, |
||||
} |
||||
rv.restart(&Item{k: k}) |
||||
return &rv |
||||
} |
||||
|
||||
func (w *Reader) RangeIterator(start, end []byte) store.KVIterator { |
||||
rv := Iterator{ |
||||
t: w.t, |
||||
start: start, |
||||
end: end, |
||||
} |
||||
rv.restart(&Item{k: start}) |
||||
return &rv |
||||
} |
||||
|
||||
func (w *Reader) Close() error { |
||||
return nil |
||||
} |
@ -0,0 +1,82 @@ |
||||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
|
||||
package gtreap |
||||
|
||||
import ( |
||||
"bytes" |
||||
"fmt" |
||||
"os" |
||||
"sync" |
||||
|
||||
"github.com/blevesearch/bleve/index/store" |
||||
"github.com/blevesearch/bleve/registry" |
||||
"github.com/steveyen/gtreap" |
||||
) |
||||
|
||||
const Name = "gtreap" |
||||
|
||||
type Store struct { |
||||
m sync.Mutex |
||||
t *gtreap.Treap |
||||
mo store.MergeOperator |
||||
} |
||||
|
||||
type Item struct { |
||||
k []byte |
||||
v []byte |
||||
} |
||||
|
||||
func itemCompare(a, b interface{}) int { |
||||
return bytes.Compare(a.(*Item).k, b.(*Item).k) |
||||
} |
||||
|
||||
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { |
||||
path, ok := config["path"].(string) |
||||
if !ok { |
||||
return nil, fmt.Errorf("must specify path") |
||||
} |
||||
if path != "" { |
||||
return nil, os.ErrInvalid |
||||
} |
||||
|
||||
rv := Store{ |
||||
t: gtreap.NewTreap(itemCompare), |
||||
mo: mo, |
||||
} |
||||
return &rv, nil |
||||
} |
||||
|
||||
func (s *Store) Close() error { |
||||
return nil |
||||
} |
||||
|
||||
func (s *Store) Reader() (store.KVReader, error) { |
||||
s.m.Lock() |
||||
t := s.t |
||||
s.m.Unlock() |
||||
return &Reader{t: t}, nil |
||||
} |
||||
|
||||
func (s *Store) Writer() (store.KVWriter, error) { |
||||
return &Writer{s: s}, nil |
||||
} |
||||
|
||||
func init() { |
||||
registry.RegisterKVStore(Name, New) |
||||
} |
@ -0,0 +1,76 @@ |
||||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package gtreap provides an in-memory implementation of the
|
||||
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||
// copy-on-write data structure.
|
||||
package gtreap |
||||
|
||||
import ( |
||||
"fmt" |
||||
"math/rand" |
||||
|
||||
"github.com/blevesearch/bleve/index/store" |
||||
) |
||||
|
||||
type Writer struct { |
||||
s *Store |
||||
} |
||||
|
||||
func (w *Writer) NewBatch() store.KVBatch { |
||||
return store.NewEmulatedBatch(w.s.mo) |
||||
} |
||||
|
||||
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { |
||||
return make([]byte, options.TotalBytes), w.NewBatch(), nil |
||||
} |
||||
|
||||
func (w *Writer) ExecuteBatch(batch store.KVBatch) error { |
||||
|
||||
emulatedBatch, ok := batch.(*store.EmulatedBatch) |
||||
if !ok { |
||||
return fmt.Errorf("wrong type of batch") |
||||
} |
||||
|
||||
w.s.m.Lock() |
||||
for k, mergeOps := range emulatedBatch.Merger.Merges { |
||||
kb := []byte(k) |
||||
var existingVal []byte |
||||
existingItem := w.s.t.Get(&Item{k: kb}) |
||||
if existingItem != nil { |
||||
existingVal = w.s.t.Get(&Item{k: kb}).(*Item).v |
||||
} |
||||
mergedVal, fullMergeOk := w.s.mo.FullMerge(kb, existingVal, mergeOps) |
||||
if !fullMergeOk { |
||||
return fmt.Errorf("merge operator returned failure") |
||||
} |
||||
w.s.t = w.s.t.Upsert(&Item{k: kb, v: mergedVal}, rand.Int()) |
||||
} |
||||
|
||||
for _, op := range emulatedBatch.Ops { |
||||
if op.V != nil { |
||||
w.s.t = w.s.t.Upsert(&Item{k: op.K, v: op.V}, rand.Int()) |
||||
} else { |
||||
w.s.t = w.s.t.Delete(&Item{k: op.K}) |
||||
} |
||||
} |
||||
w.s.m.Unlock() |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (w *Writer) Close() error { |
||||
w.s = nil |
||||
return nil |
||||
} |
@ -0,0 +1,174 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package store |
||||
|
||||
import "encoding/json" |
||||
|
||||
// KVStore is an abstraction for working with KV stores. Note that
|
||||
// in order to be used with the bleve.registry, it must also implement
|
||||
// a constructor function of the registry.KVStoreConstructor type.
|
||||
type KVStore interface { |
||||
|
||||
// Writer returns a KVWriter which can be used to
|
||||
// make changes to the KVStore. If a writer cannot
|
||||
// be obtained a non-nil error is returned.
|
||||
Writer() (KVWriter, error) |
||||
|
||||
// Reader returns a KVReader which can be used to
|
||||
// read data from the KVStore. If a reader cannot
|
||||
// be obtained a non-nil error is returned.
|
||||
Reader() (KVReader, error) |
||||
|
||||
// Close closes the KVStore
|
||||
Close() error |
||||
} |
||||
|
||||
// KVReader is an abstraction of an **ISOLATED** reader
|
||||
// In this context isolated is defined to mean that
|
||||
// writes/deletes made after the KVReader is opened
|
||||
// are not observed.
|
||||
// Because there is usually a cost associated with
|
||||
// keeping isolated readers active, users should
|
||||
// close them as soon as they are no longer needed.
|
||||
type KVReader interface { |
||||
|
||||
// Get returns the value associated with the key
|
||||
// If the key does not exist, nil is returned.
|
||||
// The caller owns the bytes returned.
|
||||
Get(key []byte) ([]byte, error) |
||||
|
||||
// MultiGet retrieves multiple values in one call.
|
||||
MultiGet(keys [][]byte) ([][]byte, error) |
||||
|
||||
// PrefixIterator returns a KVIterator that will
|
||||
// visit all K/V pairs with the provided prefix
|
||||
PrefixIterator(prefix []byte) KVIterator |
||||
|
||||
// RangeIterator returns a KVIterator that will
|
||||
// visit all K/V pairs >= start AND < end
|
||||
RangeIterator(start, end []byte) KVIterator |
||||
|
||||
// Close closes the iterator
|
||||
Close() error |
||||
} |
||||
|
||||
// KVIterator is an abstraction around key iteration
|
||||
type KVIterator interface { |
||||
|
||||
// Seek will advance the iterator to the specified key
|
||||
Seek(key []byte) |
||||
|
||||
// Next will advance the iterator to the next key
|
||||
Next() |
||||
|
||||
// Key returns the key pointed to by the iterator
|
||||
// The bytes returned are **ONLY** valid until the next call to Seek/Next/Close
|
||||
// Continued use after that requires that they be copied.
|
||||
Key() []byte |
||||
|
||||
// Value returns the value pointed to by the iterator
|
||||
// The bytes returned are **ONLY** valid until the next call to Seek/Next/Close
|
||||
// Continued use after that requires that they be copied.
|
||||
Value() []byte |
||||
|
||||
// Valid returns whether or not the iterator is in a valid state
|
||||
Valid() bool |
||||
|
||||
// Current returns Key(),Value(),Valid() in a single operation
|
||||
Current() ([]byte, []byte, bool) |
||||
|
||||
// Close closes the iterator
|
||||
Close() error |
||||
} |
||||
|
||||
// KVWriter is an abstraction for mutating the KVStore
|
||||
// KVWriter does **NOT** enforce restrictions of a single writer
|
||||
// if the underlying KVStore allows concurrent writes, the
|
||||
// KVWriter interface should also do so, it is up to the caller
|
||||
// to do this in a way that is safe and makes sense
|
||||
type KVWriter interface { |
||||
|
||||
// NewBatch returns a KVBatch for performing batch operations on this kvstore
|
||||
NewBatch() KVBatch |
||||
|
||||
// NewBatchEx returns a KVBatch and an associated byte array
|
||||
// that's pre-sized based on the KVBatchOptions. The caller can
|
||||
// use the returned byte array for keys and values associated with
|
||||
// the batch. Once the batch is either executed or closed, the
|
||||
// associated byte array should no longer be accessed by the
|
||||
// caller.
|
||||
NewBatchEx(KVBatchOptions) ([]byte, KVBatch, error) |
||||
|
||||
// ExecuteBatch will execute the KVBatch, the provided KVBatch **MUST** have
|
||||
// been created by the same KVStore (though not necessarily the same KVWriter)
|
||||
// Batch execution is atomic, either all the operations or none will be performed
|
||||
ExecuteBatch(batch KVBatch) error |
||||
|
||||
// Close closes the writer
|
||||
Close() error |
||||
} |
||||
|
||||
// KVBatchOptions provides the KVWriter.NewBatchEx() method with batch
|
||||
// preparation and preallocation information.
|
||||
type KVBatchOptions struct { |
||||
// TotalBytes is the sum of key and value bytes needed by the
|
||||
// caller for the entire batch. It affects the size of the
|
||||
// returned byte array of KVWrite.NewBatchEx().
|
||||
TotalBytes int |
||||
|
||||
// NumSets is the number of Set() calls the caller will invoke on
|
||||
// the KVBatch.
|
||||
NumSets int |
||||
|
||||
// NumDeletes is the number of Delete() calls the caller will invoke
|
||||
// on the KVBatch.
|
||||
NumDeletes int |
||||
|
||||
// NumMerges is the number of Merge() calls the caller will invoke
|
||||
// on the KVBatch.
|
||||
NumMerges int |
||||
} |
||||
|
||||
// KVBatch is an abstraction for making multiple KV mutations at once
|
||||
type KVBatch interface { |
||||
|
||||
// Set updates the key with the specified value
|
||||
// both key and value []byte may be reused as soon as this call returns
|
||||
Set(key, val []byte) |
||||
|
||||
// Delete removes the specified key
|
||||
// the key []byte may be reused as soon as this call returns
|
||||
Delete(key []byte) |
||||
|
||||
// Merge merges old value with the new value at the specified key
|
||||
// as prescribed by the KVStores merge operator
|
||||
// both key and value []byte may be reused as soon as this call returns
|
||||
Merge(key, val []byte) |
||||
|
||||
// Reset frees resources for this batch and allows reuse
|
||||
Reset() |
||||
|
||||
// Close frees resources
|
||||
Close() error |
||||
} |
||||
|
||||
// KVStoreStats is an optional interface that KVStores can implement
|
||||
// if they're able to report any useful stats
|
||||
type KVStoreStats interface { |
||||
// Stats returns a JSON serializable object representing stats for this KVStore
|
||||
Stats() json.Marshaler |
||||
|
||||
StatsMap() map[string]interface{} |
||||
} |
@ -0,0 +1,64 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package store |
||||
|
||||
// At the moment this happens to be the same interface as described by
|
||||
// RocksDB, but this may not always be the case.
|
||||
|
||||
type MergeOperator interface { |
||||
|
||||
// FullMerge the full sequence of operands on top of the existingValue
|
||||
// if no value currently exists, existingValue is nil
|
||||
// return the merged value, and success/failure
|
||||
FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) |
||||
|
||||
// Partially merge these two operands.
|
||||
// If partial merge cannot be done, return nil,false, which will defer
|
||||
// all processing until the FullMerge is done.
|
||||
PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) |
||||
|
||||
// Name returns an identifier for the operator
|
||||
Name() string |
||||
} |
||||
|
||||
type EmulatedMerge struct { |
||||
Merges map[string][][]byte |
||||
mo MergeOperator |
||||
} |
||||
|
||||
func NewEmulatedMerge(mo MergeOperator) *EmulatedMerge { |
||||
return &EmulatedMerge{ |
||||
Merges: make(map[string][][]byte), |
||||
mo: mo, |
||||
} |
||||
} |
||||
|
||||
func (m *EmulatedMerge) Merge(key, val []byte) { |
||||
ops, ok := m.Merges[string(key)] |
||||
if ok && len(ops) > 0 { |
||||
last := ops[len(ops)-1] |
||||
mergedVal, partialMergeOk := m.mo.PartialMerge(key, last, val) |
||||
if partialMergeOk { |
||||
// replace last entry with the result of the merge
|
||||
ops[len(ops)-1] = mergedVal |
||||
} else { |
||||
// could not partial merge, append this to the end
|
||||
ops = append(ops, val) |
||||
} |
||||
} else { |
||||
ops = [][]byte{val} |
||||
} |
||||
m.Merges[string(key)] = ops |
||||
} |
@ -0,0 +1,33 @@ |
||||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package store |
||||
|
||||
// MultiGet is a helper function to retrieve mutiple keys from a
|
||||
// KVReader, and might be used by KVStore implementations that don't
|
||||
// have a native multi-get facility.
|
||||
func MultiGet(kvreader KVReader, keys [][]byte) ([][]byte, error) { |
||||
vals := make([][]byte, 0, len(keys)) |
||||
|
||||
for i, key := range keys { |
||||
val, err := kvreader.Get(key) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
vals[i] = val |
||||
} |
||||
|
||||
return vals, nil |
||||
} |
@ -0,0 +1,110 @@ |
||||
// Copyright (c) 2015 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/document" |
||||
"github.com/blevesearch/bleve/index" |
||||
) |
||||
|
||||
func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult { |
||||
rv := &index.AnalysisResult{ |
||||
DocID: d.ID, |
||||
Rows: make([]index.IndexRow, 0, 100), |
||||
} |
||||
|
||||
docIDBytes := []byte(d.ID) |
||||
|
||||
// track our back index entries
|
||||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0) |
||||
|
||||
// information we collate as we merge fields with same name
|
||||
fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies) |
||||
fieldLengths := make(map[uint16]int) |
||||
fieldIncludeTermVectors := make(map[uint16]bool) |
||||
fieldNames := make(map[uint16]string) |
||||
|
||||
analyzeField := func(field document.Field, storable bool) { |
||||
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name()) |
||||
if newFieldRow != nil { |
||||
rv.Rows = append(rv.Rows, newFieldRow) |
||||
} |
||||
fieldNames[fieldIndex] = field.Name() |
||||
|
||||
if field.Options().IsIndexed() { |
||||
fieldLength, tokenFreqs := field.Analyze() |
||||
existingFreqs := fieldTermFreqs[fieldIndex] |
||||
if existingFreqs == nil { |
||||
fieldTermFreqs[fieldIndex] = tokenFreqs |
||||
} else { |
||||
existingFreqs.MergeAll(field.Name(), tokenFreqs) |
||||
fieldTermFreqs[fieldIndex] = existingFreqs |
||||
} |
||||
fieldLengths[fieldIndex] += fieldLength |
||||
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors() |
||||
} |
||||
|
||||
if storable && field.Options().IsStored() { |
||||
rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries) |
||||
} |
||||
} |
||||
|
||||
// walk all the fields, record stored fields now
|
||||
// place information about indexed fields into map
|
||||
// this collates information across fields with
|
||||
// same names (arrays)
|
||||
for _, field := range d.Fields { |
||||
analyzeField(field, true) |
||||
} |
||||
|
||||
if len(d.CompositeFields) > 0 { |
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs { |
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range d.CompositeFields { |
||||
compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs) |
||||
} |
||||
} |
||||
|
||||
for _, compositeField := range d.CompositeFields { |
||||
analyzeField(compositeField, false) |
||||
} |
||||
} |
||||
|
||||
rowsCapNeeded := len(rv.Rows) + 1 |
||||
for _, tokenFreqs := range fieldTermFreqs { |
||||
rowsCapNeeded += len(tokenFreqs) |
||||
} |
||||
|
||||
rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...) |
||||
|
||||
backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded) |
||||
|
||||
// walk through the collated information and process
|
||||
// once for each indexed field (unique name)
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs { |
||||
fieldLength := fieldLengths[fieldIndex] |
||||
includeTermVectors := fieldIncludeTermVectors[fieldIndex] |
||||
|
||||
// encode this field
|
||||
rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries) |
||||
} |
||||
|
||||
// build the back index row
|
||||
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries) |
||||
rv.Rows = append(rv.Rows, backIndexRow) |
||||
|
||||
return rv |
||||
} |
@ -0,0 +1,8 @@ |
||||
#!/bin/sh |
||||
|
||||
BENCHMARKS=`grep "func Benchmark" *_test.go | sed 's/.*func //' | sed s/\(.*{//` |
||||
|
||||
for BENCHMARK in $BENCHMARKS |
||||
do |
||||
go test -v -run=xxx -bench=^$BENCHMARK$ -benchtime=10s -tags 'forestdb leveldb' | grep -v ok | grep -v PASS |
||||
done |
@ -0,0 +1,172 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown |
||||
|
||||
import ( |
||||
"bytes" |
||||
"sort" |
||||
|
||||
"github.com/blevesearch/bleve/index/store" |
||||
) |
||||
|
||||
// the functions in this file are only intended to be used by
|
||||
// the bleve_dump utility and the debug http handlers
|
||||
// if your application relies on them, you're doing something wrong
|
||||
// they may change or be removed at any time
|
||||
|
||||
func dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) { |
||||
start := prefix |
||||
if start == nil { |
||||
start = []byte{0} |
||||
} |
||||
it := kvreader.PrefixIterator(start) |
||||
defer func() { |
||||
cerr := it.Close() |
||||
if cerr != nil { |
||||
rv <- cerr |
||||
} |
||||
}() |
||||
key, val, valid := it.Current() |
||||
for valid { |
||||
ck := make([]byte, len(key)) |
||||
copy(ck, key) |
||||
cv := make([]byte, len(val)) |
||||
copy(cv, val) |
||||
row, err := ParseFromKeyValue(ck, cv) |
||||
if err != nil { |
||||
rv <- err |
||||
return |
||||
} |
||||
rv <- row |
||||
|
||||
it.Next() |
||||
key, val, valid = it.Current() |
||||
} |
||||
} |
||||
|
||||
func dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) { |
||||
it := kvreader.RangeIterator(start, end) |
||||
defer func() { |
||||
cerr := it.Close() |
||||
if cerr != nil { |
||||
rv <- cerr |
||||
} |
||||
}() |
||||
key, val, valid := it.Current() |
||||
for valid { |
||||
ck := make([]byte, len(key)) |
||||
copy(ck, key) |
||||
cv := make([]byte, len(val)) |
||||
copy(cv, val) |
||||
row, err := ParseFromKeyValue(ck, cv) |
||||
if err != nil { |
||||
rv <- err |
||||
return |
||||
} |
||||
rv <- row |
||||
|
||||
it.Next() |
||||
key, val, valid = it.Current() |
||||
} |
||||
} |
||||
|
||||
func (i *IndexReader) DumpAll() chan interface{} { |
||||
rv := make(chan interface{}) |
||||
go func() { |
||||
defer close(rv) |
||||
dumpRange(i.kvreader, rv, nil, nil) |
||||
}() |
||||
return rv |
||||
} |
||||
|
||||
func (i *IndexReader) DumpFields() chan interface{} { |
||||
rv := make(chan interface{}) |
||||
go func() { |
||||
defer close(rv) |
||||
dumpPrefix(i.kvreader, rv, []byte{'f'}) |
||||
}() |
||||
return rv |
||||
} |
||||
|
||||
type keyset [][]byte |
||||
|
||||
func (k keyset) Len() int { return len(k) } |
||||
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] } |
||||
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 } |
||||
|
||||
// DumpDoc returns all rows in the index related to this doc id
|
||||
func (i *IndexReader) DumpDoc(id string) chan interface{} { |
||||
idBytes := []byte(id) |
||||
|
||||
rv := make(chan interface{}) |
||||
|
||||
go func() { |
||||
defer close(rv) |
||||
|
||||
back, err := backIndexRowForDoc(i.kvreader, []byte(id)) |
||||
if err != nil { |
||||
rv <- err |
||||
return |
||||
} |
||||
|
||||
// no such doc
|
||||
if back == nil { |
||||
return |
||||
} |
||||
// build sorted list of term keys
|
||||
keys := make(keyset, 0) |
||||
for _, entry := range back.termEntries { |
||||
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0) |
||||
key := tfr.Key() |
||||
keys = append(keys, key) |
||||
} |
||||
sort.Sort(keys) |
||||
|
||||
// first add all the stored rows
|
||||
storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc() |
||||
dumpPrefix(i.kvreader, rv, storedRowPrefix) |
||||
|
||||
// now walk term keys in order and add them as well
|
||||
if len(keys) > 0 { |
||||
it := i.kvreader.RangeIterator(keys[0], nil) |
||||
defer func() { |
||||
cerr := it.Close() |
||||
if cerr != nil { |
||||
rv <- cerr |
||||
} |
||||
}() |
||||
|
||||
for _, key := range keys { |
||||
it.Seek(key) |
||||
rkey, rval, valid := it.Current() |
||||
if !valid { |
||||
break |
||||
} |
||||
rck := make([]byte, len(rkey)) |
||||
copy(rck, key) |
||||
rcv := make([]byte, len(rval)) |
||||
copy(rcv, rval) |
||||
row, err := ParseFromKeyValue(rck, rcv) |
||||
if err != nil { |
||||
rv <- err |
||||
return |
||||
} |
||||
rv <- row |
||||
} |
||||
} |
||||
}() |
||||
|
||||
return rv |
||||
} |
@ -0,0 +1,78 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/index" |
||||
"github.com/blevesearch/bleve/index/store" |
||||
) |
||||
|
||||
type UpsideDownCouchFieldDict struct { |
||||
indexReader *IndexReader |
||||
iterator store.KVIterator |
||||
dictRow *DictionaryRow |
||||
dictEntry *index.DictEntry |
||||
field uint16 |
||||
} |
||||
|
||||
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) { |
||||
|
||||
startKey := NewDictionaryRow(startTerm, field, 0).Key() |
||||
if endTerm == nil { |
||||
endTerm = []byte{ByteSeparator} |
||||
} else { |
||||
endTerm = incrementBytes(endTerm) |
||||
} |
||||
endKey := NewDictionaryRow(endTerm, field, 0).Key() |
||||
|
||||
it := indexReader.kvreader.RangeIterator(startKey, endKey) |
||||
|
||||
return &UpsideDownCouchFieldDict{ |
||||
indexReader: indexReader, |
||||
iterator: it, |
||||
dictRow: &DictionaryRow{}, // Pre-alloced, reused row.
|
||||
dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry.
|
||||
field: field, |
||||
}, nil |
||||
|
||||
} |
||||
|
||||
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { |
||||
key, val, valid := r.iterator.Current() |
||||
if !valid { |
||||
return nil, nil |
||||
} |
||||
|
||||
err := r.dictRow.parseDictionaryK(key) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("unexpected error parsing dictionary row key: %v", err) |
||||
} |
||||
err = r.dictRow.parseDictionaryV(val) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("unexpected error parsing dictionary row val: %v", err) |
||||
} |
||||
r.dictEntry.Term = string(r.dictRow.term) |
||||
r.dictEntry.Count = r.dictRow.count |
||||
// advance the iterator to the next term
|
||||
r.iterator.Next() |
||||
return r.dictEntry, nil |
||||
|
||||
} |
||||
|
||||
func (r *UpsideDownCouchFieldDict) Close() error { |
||||
return r.iterator.Close() |
||||
} |
@ -0,0 +1,189 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/document" |
||||
"github.com/blevesearch/bleve/index" |
||||
"github.com/blevesearch/bleve/index/store" |
||||
) |
||||
|
||||
type IndexReader struct { |
||||
index *UpsideDownCouch |
||||
kvreader store.KVReader |
||||
docCount uint64 |
||||
} |
||||
|
||||
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { |
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) |
||||
if fieldExists { |
||||
return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors) |
||||
} |
||||
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0), includeFreq, includeNorm, includeTermVectors) |
||||
} |
||||
|
||||
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) { |
||||
return i.FieldDictRange(fieldName, nil, nil) |
||||
} |
||||
|
||||
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { |
||||
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) |
||||
if fieldExists { |
||||
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm) |
||||
} |
||||
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{}) |
||||
} |
||||
|
||||
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) { |
||||
return i.FieldDictRange(fieldName, termPrefix, termPrefix) |
||||
} |
||||
|
||||
func (i *IndexReader) DocIDReaderAll() (index.DocIDReader, error) { |
||||
return newUpsideDownCouchDocIDReader(i) |
||||
} |
||||
|
||||
func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) { |
||||
return newUpsideDownCouchDocIDReaderOnly(i, ids) |
||||
} |
||||
|
||||
func (i *IndexReader) Document(id string) (doc *document.Document, err error) { |
||||
// first hit the back index to confirm doc exists
|
||||
var backIndexRow *BackIndexRow |
||||
backIndexRow, err = backIndexRowForDoc(i.kvreader, []byte(id)) |
||||
if err != nil { |
||||
return |
||||
} |
||||
if backIndexRow == nil { |
||||
return |
||||
} |
||||
doc = document.NewDocument(id) |
||||
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil) |
||||
storedRowScanPrefix := storedRow.ScanPrefixForDoc() |
||||
it := i.kvreader.PrefixIterator(storedRowScanPrefix) |
||||
defer func() { |
||||
if cerr := it.Close(); err == nil && cerr != nil { |
||||
err = cerr |
||||
} |
||||
}() |
||||
key, val, valid := it.Current() |
||||
for valid { |
||||
safeVal := make([]byte, len(val)) |
||||
copy(safeVal, val) |
||||
var row *StoredRow |
||||
row, err = NewStoredRowKV(key, safeVal) |
||||
if err != nil { |
||||
doc = nil |
||||
return |
||||
} |
||||
if row != nil { |
||||
fieldName := i.index.fieldCache.FieldIndexed(row.field) |
||||
field := decodeFieldType(row.typ, fieldName, row.arrayPositions, row.value) |
||||
if field != nil { |
||||
doc.AddField(field) |
||||
} |
||||
} |
||||
|
||||
it.Next() |
||||
key, val, valid = it.Current() |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { |
||||
back, err := backIndexRowForDoc(i.kvreader, id) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if back == nil { |
||||
return nil, nil |
||||
} |
||||
rv := make(index.FieldTerms, len(fields)) |
||||
fieldsMap := make(map[uint16]string, len(fields)) |
||||
for _, f := range fields { |
||||
id, ok := i.index.fieldCache.FieldNamed(f, false) |
||||
if ok { |
||||
fieldsMap[id] = f |
||||
} |
||||
} |
||||
for _, entry := range back.termEntries { |
||||
if field, ok := fieldsMap[uint16(*entry.Field)]; ok { |
||||
rv[field] = append(rv[field], *entry.Term) |
||||
} |
||||
} |
||||
return rv, nil |
||||
} |
||||
|
||||
func (i *IndexReader) Fields() (fields []string, err error) { |
||||
fields = make([]string, 0) |
||||
it := i.kvreader.PrefixIterator([]byte{'f'}) |
||||
defer func() { |
||||
if cerr := it.Close(); err == nil && cerr != nil { |
||||
err = cerr |
||||
} |
||||
}() |
||||
key, val, valid := it.Current() |
||||
for valid { |
||||
var row UpsideDownCouchRow |
||||
row, err = ParseFromKeyValue(key, val) |
||||
if err != nil { |
||||
fields = nil |
||||
return |
||||
} |
||||
if row != nil { |
||||
fieldRow, ok := row.(*FieldRow) |
||||
if ok { |
||||
fields = append(fields, fieldRow.name) |
||||
} |
||||
} |
||||
|
||||
it.Next() |
||||
key, val, valid = it.Current() |
||||
} |
||||
return |
||||
} |
||||
|
||||
func (i *IndexReader) GetInternal(key []byte) ([]byte, error) { |
||||
internalRow := NewInternalRow(key, nil) |
||||
return i.kvreader.Get(internalRow.Key()) |
||||
} |
||||
|
||||
func (i *IndexReader) DocCount() (uint64, error) { |
||||
return i.docCount, nil |
||||
} |
||||
|
||||
func (i *IndexReader) Close() error { |
||||
return i.kvreader.Close() |
||||
} |
||||
|
||||
func (i *IndexReader) ExternalID(id index.IndexInternalID) (string, error) { |
||||
return string(id), nil |
||||
} |
||||
|
||||
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) { |
||||
return index.IndexInternalID(id), nil |
||||
} |
||||
|
||||
func incrementBytes(in []byte) []byte { |
||||
rv := make([]byte, len(in)) |
||||
copy(rv, in) |
||||
for i := len(rv) - 1; i >= 0; i-- { |
||||
rv[i] = rv[i] + 1 |
||||
if rv[i] != 0 { |
||||
// didn't overflow, so stop
|
||||
break |
||||
} |
||||
} |
||||
return rv |
||||
} |
@ -0,0 +1,325 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown |
||||
|
||||
import ( |
||||
"bytes" |
||||
"sort" |
||||
"sync/atomic" |
||||
|
||||
"github.com/blevesearch/bleve/index" |
||||
"github.com/blevesearch/bleve/index/store" |
||||
) |
||||
|
||||
type UpsideDownCouchTermFieldReader struct { |
||||
count uint64 |
||||
indexReader *IndexReader |
||||
iterator store.KVIterator |
||||
term []byte |
||||
tfrNext *TermFrequencyRow |
||||
keyBuf []byte |
||||
field uint16 |
||||
} |
||||
|
||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { |
||||
dictionaryRow := NewDictionaryRow(term, field, 0) |
||||
val, err := indexReader.kvreader.Get(dictionaryRow.Key()) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if val == nil { |
||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) |
||||
return &UpsideDownCouchTermFieldReader{ |
||||
count: 0, |
||||
term: term, |
||||
tfrNext: &TermFrequencyRow{}, |
||||
field: field, |
||||
}, nil |
||||
} |
||||
|
||||
err = dictionaryRow.parseDictionaryV(val) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0) |
||||
it := indexReader.kvreader.PrefixIterator(tfr.Key()) |
||||
|
||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) |
||||
return &UpsideDownCouchTermFieldReader{ |
||||
indexReader: indexReader, |
||||
iterator: it, |
||||
count: dictionaryRow.count, |
||||
term: term, |
||||
field: field, |
||||
}, nil |
||||
} |
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Count() uint64 { |
||||
return r.count |
||||
} |
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { |
||||
if r.iterator != nil { |
||||
// We treat tfrNext also like an initialization flag, which
|
||||
// tells us whether we need to invoke the underlying
|
||||
// iterator.Next(). The first time, don't call iterator.Next().
|
||||
if r.tfrNext != nil { |
||||
r.iterator.Next() |
||||
} else { |
||||
r.tfrNext = &TermFrequencyRow{} |
||||
} |
||||
key, val, valid := r.iterator.Current() |
||||
if valid { |
||||
tfr := r.tfrNext |
||||
err := tfr.parseKDoc(key, r.term) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
err = tfr.parseV(val) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv := preAlloced |
||||
if rv == nil { |
||||
rv = &index.TermFieldDoc{} |
||||
} |
||||
rv.ID = append(rv.ID, tfr.doc...) |
||||
rv.Freq = tfr.freq |
||||
rv.Norm = float64(tfr.norm) |
||||
if tfr.vectors != nil { |
||||
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) |
||||
} |
||||
return rv, nil |
||||
} |
||||
} |
||||
return nil, nil |
||||
} |
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) { |
||||
if r.iterator != nil { |
||||
if r.tfrNext == nil { |
||||
r.tfrNext = &TermFrequencyRow{} |
||||
} |
||||
tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0) |
||||
r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0]) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
r.iterator.Seek(r.keyBuf) |
||||
key, val, valid := r.iterator.Current() |
||||
if valid { |
||||
err := tfr.parseKDoc(key, r.term) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
err = tfr.parseV(val) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv = preAlloced |
||||
if rv == nil { |
||||
rv = &index.TermFieldDoc{} |
||||
} |
||||
rv.ID = append(rv.ID, tfr.doc...) |
||||
rv.Freq = tfr.freq |
||||
rv.Norm = float64(tfr.norm) |
||||
if tfr.vectors != nil { |
||||
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) |
||||
} |
||||
return rv, nil |
||||
} |
||||
} |
||||
return nil, nil |
||||
} |
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Close() error { |
||||
if r.indexReader != nil { |
||||
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1)) |
||||
} |
||||
if r.iterator != nil { |
||||
return r.iterator.Close() |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
type UpsideDownCouchDocIDReader struct { |
||||
indexReader *IndexReader |
||||
iterator store.KVIterator |
||||
only []string |
||||
onlyPos int |
||||
onlyMode bool |
||||
} |
||||
|
||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) { |
||||
|
||||
startBytes := []byte{0x0} |
||||
endBytes := []byte{0xff} |
||||
|
||||
bisr := NewBackIndexRow(startBytes, nil, nil) |
||||
bier := NewBackIndexRow(endBytes, nil, nil) |
||||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) |
||||
|
||||
return &UpsideDownCouchDocIDReader{ |
||||
indexReader: indexReader, |
||||
iterator: it, |
||||
}, nil |
||||
} |
||||
|
||||
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) { |
||||
// ensure ids are sorted
|
||||
sort.Strings(ids) |
||||
startBytes := []byte{0x0} |
||||
if len(ids) > 0 { |
||||
startBytes = []byte(ids[0]) |
||||
} |
||||
endBytes := []byte{0xff} |
||||
if len(ids) > 0 { |
||||
endBytes = incrementBytes([]byte(ids[len(ids)-1])) |
||||
} |
||||
bisr := NewBackIndexRow(startBytes, nil, nil) |
||||
bier := NewBackIndexRow(endBytes, nil, nil) |
||||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) |
||||
|
||||
return &UpsideDownCouchDocIDReader{ |
||||
indexReader: indexReader, |
||||
iterator: it, |
||||
only: ids, |
||||
onlyMode: true, |
||||
}, nil |
||||
} |
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) { |
||||
key, val, valid := r.iterator.Current() |
||||
|
||||
if r.onlyMode { |
||||
var rv index.IndexInternalID |
||||
for valid && r.onlyPos < len(r.only) { |
||||
br, err := NewBackIndexRowKV(key, val) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) { |
||||
ok := r.nextOnly() |
||||
if !ok { |
||||
return nil, nil |
||||
} |
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||
key, val, valid = r.iterator.Current() |
||||
continue |
||||
} else { |
||||
rv = append([]byte(nil), br.doc...) |
||||
break |
||||
} |
||||
} |
||||
if valid && r.onlyPos < len(r.only) { |
||||
ok := r.nextOnly() |
||||
if ok { |
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||
} |
||||
return rv, nil |
||||
} |
||||
|
||||
} else { |
||||
if valid { |
||||
br, err := NewBackIndexRowKV(key, val) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv := append([]byte(nil), br.doc...) |
||||
r.iterator.Next() |
||||
return rv, nil |
||||
} |
||||
} |
||||
return nil, nil |
||||
} |
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) { |
||||
|
||||
if r.onlyMode { |
||||
r.onlyPos = sort.SearchStrings(r.only, string(docID)) |
||||
if r.onlyPos >= len(r.only) { |
||||
// advanced to key after our last only key
|
||||
return nil, nil |
||||
} |
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||
key, val, valid := r.iterator.Current() |
||||
|
||||
var rv index.IndexInternalID |
||||
for valid && r.onlyPos < len(r.only) { |
||||
br, err := NewBackIndexRowKV(key, val) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) { |
||||
// the only key we seek'd to didn't exist
|
||||
// now look for the closest key that did exist in only
|
||||
r.onlyPos = sort.SearchStrings(r.only, string(br.doc)) |
||||
if r.onlyPos >= len(r.only) { |
||||
// advanced to key after our last only key
|
||||
return nil, nil |
||||
} |
||||
// now seek to this new only key
|
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||
key, val, valid = r.iterator.Current() |
||||
continue |
||||
} else { |
||||
rv = append([]byte(nil), br.doc...) |
||||
break |
||||
} |
||||
} |
||||
if valid && r.onlyPos < len(r.only) { |
||||
ok := r.nextOnly() |
||||
if ok { |
||||
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||
} |
||||
return rv, nil |
||||
} |
||||
} else { |
||||
bir := NewBackIndexRow(docID, nil, nil) |
||||
r.iterator.Seek(bir.Key()) |
||||
key, val, valid := r.iterator.Current() |
||||
if valid { |
||||
br, err := NewBackIndexRowKV(key, val) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv := append([]byte(nil), br.doc...) |
||||
r.iterator.Next() |
||||
return rv, nil |
||||
} |
||||
} |
||||
return nil, nil |
||||
} |
||||
|
||||
func (r *UpsideDownCouchDocIDReader) Close() error { |
||||
return r.iterator.Close() |
||||
} |
||||
|
||||
// move the r.only pos forward one, skipping duplicates
|
||||
// return true if there is more data, or false if we got to the end of the list
|
||||
func (r *UpsideDownCouchDocIDReader) nextOnly() bool { |
||||
|
||||
// advance 1 position, until we see a different key
|
||||
// it's already sorted, so this skips duplicates
|
||||
start := r.onlyPos |
||||
r.onlyPos++ |
||||
for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] { |
||||
start = r.onlyPos |
||||
r.onlyPos++ |
||||
} |
||||
// inidicate if we got to the end of the list
|
||||
return r.onlyPos < len(r.only) |
||||
} |
@ -0,0 +1,853 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/binary" |
||||
"fmt" |
||||
"io" |
||||
"math" |
||||
|
||||
"github.com/golang/protobuf/proto" |
||||
) |
||||
|
||||
const ByteSeparator byte = 0xff |
||||
|
||||
type UpsideDownCouchRowStream chan UpsideDownCouchRow |
||||
|
||||
type UpsideDownCouchRow interface { |
||||
KeySize() int |
||||
KeyTo([]byte) (int, error) |
||||
Key() []byte |
||||
Value() []byte |
||||
ValueSize() int |
||||
ValueTo([]byte) (int, error) |
||||
} |
||||
|
||||
func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) { |
||||
if len(key) > 0 { |
||||
switch key[0] { |
||||
case 'v': |
||||
return NewVersionRowKV(key, value) |
||||
case 'f': |
||||
return NewFieldRowKV(key, value) |
||||
case 'd': |
||||
return NewDictionaryRowKV(key, value) |
||||
case 't': |
||||
return NewTermFrequencyRowKV(key, value) |
||||
case 'b': |
||||
return NewBackIndexRowKV(key, value) |
||||
case 's': |
||||
return NewStoredRowKV(key, value) |
||||
case 'i': |
||||
return NewInternalRowKV(key, value) |
||||
} |
||||
return nil, fmt.Errorf("Unknown field type '%s'", string(key[0])) |
||||
} |
||||
return nil, fmt.Errorf("Invalid empty key") |
||||
} |
||||
|
||||
// VERSION
|
||||
|
||||
type VersionRow struct { |
||||
version uint8 |
||||
} |
||||
|
||||
func (v *VersionRow) Key() []byte { |
||||
return []byte{'v'} |
||||
} |
||||
|
||||
func (v *VersionRow) KeySize() int { |
||||
return 1 |
||||
} |
||||
|
||||
func (v *VersionRow) KeyTo(buf []byte) (int, error) { |
||||
buf[0] = 'v' |
||||
return 1, nil |
||||
} |
||||
|
||||
func (v *VersionRow) Value() []byte { |
||||
return []byte{byte(v.version)} |
||||
} |
||||
|
||||
func (v *VersionRow) ValueSize() int { |
||||
return 1 |
||||
} |
||||
|
||||
func (v *VersionRow) ValueTo(buf []byte) (int, error) { |
||||
buf[0] = v.version |
||||
return 1, nil |
||||
} |
||||
|
||||
func (v *VersionRow) String() string { |
||||
return fmt.Sprintf("Version: %d", v.version) |
||||
} |
||||
|
||||
func NewVersionRow(version uint8) *VersionRow { |
||||
return &VersionRow{ |
||||
version: version, |
||||
} |
||||
} |
||||
|
||||
func NewVersionRowKV(key, value []byte) (*VersionRow, error) { |
||||
rv := VersionRow{} |
||||
buf := bytes.NewBuffer(value) |
||||
err := binary.Read(buf, binary.LittleEndian, &rv.version) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return &rv, nil |
||||
} |
||||
|
||||
// INTERNAL STORAGE
|
||||
|
||||
type InternalRow struct { |
||||
key []byte |
||||
val []byte |
||||
} |
||||
|
||||
func (i *InternalRow) Key() []byte { |
||||
buf := make([]byte, i.KeySize()) |
||||
size, _ := i.KeyTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (i *InternalRow) KeySize() int { |
||||
return len(i.key) + 1 |
||||
} |
||||
|
||||
func (i *InternalRow) KeyTo(buf []byte) (int, error) { |
||||
buf[0] = 'i' |
||||
actual := copy(buf[1:], i.key) |
||||
return 1 + actual, nil |
||||
} |
||||
|
||||
func (i *InternalRow) Value() []byte { |
||||
return i.val |
||||
} |
||||
|
||||
func (i *InternalRow) ValueSize() int { |
||||
return len(i.val) |
||||
} |
||||
|
||||
func (i *InternalRow) ValueTo(buf []byte) (int, error) { |
||||
actual := copy(buf, i.val) |
||||
return actual, nil |
||||
} |
||||
|
||||
func (i *InternalRow) String() string { |
||||
return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val) |
||||
} |
||||
|
||||
func NewInternalRow(key, val []byte) *InternalRow { |
||||
return &InternalRow{ |
||||
key: key, |
||||
val: val, |
||||
} |
||||
} |
||||
|
||||
func NewInternalRowKV(key, value []byte) (*InternalRow, error) { |
||||
rv := InternalRow{} |
||||
rv.key = key[1:] |
||||
rv.val = value |
||||
return &rv, nil |
||||
} |
||||
|
||||
// FIELD definition
|
||||
|
||||
type FieldRow struct { |
||||
index uint16 |
||||
name string |
||||
} |
||||
|
||||
func (f *FieldRow) Key() []byte { |
||||
buf := make([]byte, f.KeySize()) |
||||
size, _ := f.KeyTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (f *FieldRow) KeySize() int { |
||||
return 3 |
||||
} |
||||
|
||||
func (f *FieldRow) KeyTo(buf []byte) (int, error) { |
||||
buf[0] = 'f' |
||||
binary.LittleEndian.PutUint16(buf[1:3], f.index) |
||||
return 3, nil |
||||
} |
||||
|
||||
func (f *FieldRow) Value() []byte { |
||||
return append([]byte(f.name), ByteSeparator) |
||||
} |
||||
|
||||
func (f *FieldRow) ValueSize() int { |
||||
return len(f.name) + 1 |
||||
} |
||||
|
||||
func (f *FieldRow) ValueTo(buf []byte) (int, error) { |
||||
size := copy(buf, f.name) |
||||
buf[size] = ByteSeparator |
||||
return size + 1, nil |
||||
} |
||||
|
||||
func (f *FieldRow) String() string { |
||||
return fmt.Sprintf("Field: %d Name: %s", f.index, f.name) |
||||
} |
||||
|
||||
func NewFieldRow(index uint16, name string) *FieldRow { |
||||
return &FieldRow{ |
||||
index: index, |
||||
name: name, |
||||
} |
||||
} |
||||
|
||||
func NewFieldRowKV(key, value []byte) (*FieldRow, error) { |
||||
rv := FieldRow{} |
||||
|
||||
buf := bytes.NewBuffer(key) |
||||
_, err := buf.ReadByte() // type
|
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
err = binary.Read(buf, binary.LittleEndian, &rv.index) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
buf = bytes.NewBuffer(value) |
||||
rv.name, err = buf.ReadString(ByteSeparator) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
|
||||
|
||||
return &rv, nil |
||||
} |
||||
|
||||
// DICTIONARY
|
||||
|
||||
const DictionaryRowMaxValueSize = binary.MaxVarintLen64 |
||||
|
||||
type DictionaryRow struct { |
||||
term []byte |
||||
count uint64 |
||||
field uint16 |
||||
} |
||||
|
||||
func (dr *DictionaryRow) Key() []byte { |
||||
buf := make([]byte, dr.KeySize()) |
||||
size, _ := dr.KeyTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (dr *DictionaryRow) KeySize() int { |
||||
return len(dr.term) + 3 |
||||
} |
||||
|
||||
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { |
||||
buf[0] = 'd' |
||||
binary.LittleEndian.PutUint16(buf[1:3], dr.field) |
||||
size := copy(buf[3:], dr.term) |
||||
return size + 3, nil |
||||
} |
||||
|
||||
func (dr *DictionaryRow) Value() []byte { |
||||
buf := make([]byte, dr.ValueSize()) |
||||
size, _ := dr.ValueTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (dr *DictionaryRow) ValueSize() int { |
||||
return DictionaryRowMaxValueSize |
||||
} |
||||
|
||||
func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) { |
||||
used := binary.PutUvarint(buf, dr.count) |
||||
return used, nil |
||||
} |
||||
|
||||
func (dr *DictionaryRow) String() string { |
||||
return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count) |
||||
} |
||||
|
||||
func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow { |
||||
return &DictionaryRow{ |
||||
term: term, |
||||
field: field, |
||||
count: count, |
||||
} |
||||
} |
||||
|
||||
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) { |
||||
rv, err := NewDictionaryRowK(key) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
err = rv.parseDictionaryV(value) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return rv, nil |
||||
|
||||
} |
||||
|
||||
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) { |
||||
rv := &DictionaryRow{} |
||||
err := rv.parseDictionaryK(key) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return rv, nil |
||||
} |
||||
|
||||
func (dr *DictionaryRow) parseDictionaryK(key []byte) error { |
||||
dr.field = binary.LittleEndian.Uint16(key[1:3]) |
||||
if dr.term != nil { |
||||
dr.term = dr.term[:0] |
||||
} |
||||
dr.term = append(dr.term, key[3:]...) |
||||
return nil |
||||
} |
||||
|
||||
func (dr *DictionaryRow) parseDictionaryV(value []byte) error { |
||||
count, nread := binary.Uvarint(value) |
||||
if nread <= 0 { |
||||
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) |
||||
} |
||||
dr.count = count |
||||
return nil |
||||
} |
||||
|
||||
// TERM FIELD FREQUENCY
|
||||
|
||||
type TermVector struct { |
||||
field uint16 |
||||
arrayPositions []uint64 |
||||
pos uint64 |
||||
start uint64 |
||||
end uint64 |
||||
} |
||||
|
||||
func (tv *TermVector) String() string { |
||||
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions) |
||||
} |
||||
|
||||
type TermFrequencyRow struct { |
||||
term []byte |
||||
doc []byte |
||||
freq uint64 |
||||
vectors []*TermVector |
||||
norm float32 |
||||
field uint16 |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) Term() []byte { |
||||
return tfr.term |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) Freq() uint64 { |
||||
return tfr.freq |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) ScanPrefixForField() []byte { |
||||
buf := make([]byte, 3) |
||||
buf[0] = 't' |
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) |
||||
return buf |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte { |
||||
buf := make([]byte, 3+len(tfr.term)) |
||||
buf[0] = 't' |
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) |
||||
copy(buf[3:], tfr.term) |
||||
return buf |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte { |
||||
buf := make([]byte, 3+len(tfr.term)+1) |
||||
buf[0] = 't' |
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) |
||||
termLen := copy(buf[3:], tfr.term) |
||||
buf[3+termLen] = ByteSeparator |
||||
return buf |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) Key() []byte { |
||||
buf := make([]byte, tfr.KeySize()) |
||||
size, _ := tfr.KeyTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) KeySize() int { |
||||
return 3 + len(tfr.term) + 1 + len(tfr.doc) |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { |
||||
buf[0] = 't' |
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) |
||||
termLen := copy(buf[3:], tfr.term) |
||||
buf[3+termLen] = ByteSeparator |
||||
docLen := copy(buf[3+termLen+1:], tfr.doc) |
||||
return 3 + termLen + 1 + docLen, nil |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) { |
||||
keySize := tfr.KeySize() |
||||
if cap(buf) < keySize { |
||||
buf = make([]byte, keySize) |
||||
} |
||||
actualSize, err := tfr.KeyTo(buf[0:keySize]) |
||||
return buf[0:actualSize], err |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { |
||||
dr := NewDictionaryRow(tfr.term, tfr.field, 0) |
||||
return dr.Key() |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) DictionaryRowKeySize() int { |
||||
dr := NewDictionaryRow(tfr.term, tfr.field, 0) |
||||
return dr.KeySize() |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) { |
||||
dr := NewDictionaryRow(tfr.term, tfr.field, 0) |
||||
return dr.KeyTo(buf) |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) Value() []byte { |
||||
buf := make([]byte, tfr.ValueSize()) |
||||
size, _ := tfr.ValueTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) ValueSize() int { |
||||
bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64 |
||||
for _, vector := range tfr.vectors { |
||||
bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64 |
||||
} |
||||
return bufLen |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) { |
||||
used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq) |
||||
|
||||
normuint32 := math.Float32bits(tfr.norm) |
||||
newbuf := buf[used : used+binary.MaxVarintLen64] |
||||
used += binary.PutUvarint(newbuf, uint64(normuint32)) |
||||
|
||||
for _, vector := range tfr.vectors { |
||||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field)) |
||||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos) |
||||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start) |
||||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end) |
||||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions))) |
||||
for _, arrayPosition := range vector.arrayPositions { |
||||
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition) |
||||
} |
||||
} |
||||
return used, nil |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) String() string { |
||||
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors) |
||||
} |
||||
|
||||
func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { |
||||
tfr.term = term |
||||
tfr.field = field |
||||
tfr.doc = docID |
||||
tfr.freq = freq |
||||
tfr.norm = norm |
||||
return tfr |
||||
} |
||||
|
||||
func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { |
||||
return &TermFrequencyRow{ |
||||
term: term, |
||||
field: field, |
||||
doc: docID, |
||||
freq: freq, |
||||
norm: norm, |
||||
} |
||||
} |
||||
|
||||
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow { |
||||
return &TermFrequencyRow{ |
||||
term: term, |
||||
field: field, |
||||
doc: docID, |
||||
freq: freq, |
||||
norm: norm, |
||||
vectors: vectors, |
||||
} |
||||
} |
||||
|
||||
func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) { |
||||
rv := &TermFrequencyRow{} |
||||
err := rv.parseK(key) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return rv, nil |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) parseK(key []byte) error { |
||||
keyLen := len(key) |
||||
if keyLen < 3 { |
||||
return fmt.Errorf("invalid term frequency key, no valid field") |
||||
} |
||||
tfr.field = binary.LittleEndian.Uint16(key[1:3]) |
||||
|
||||
termEndPos := bytes.IndexByte(key[3:], ByteSeparator) |
||||
if termEndPos < 0 { |
||||
return fmt.Errorf("invalid term frequency key, no byte separator terminating term") |
||||
} |
||||
tfr.term = key[3 : 3+termEndPos] |
||||
|
||||
docLen := keyLen - (3 + termEndPos + 1) |
||||
if docLen < 1 { |
||||
return fmt.Errorf("invalid term frequency key, empty docid") |
||||
} |
||||
tfr.doc = key[3+termEndPos+1:] |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error { |
||||
tfr.doc = key[3+len(term)+1:] |
||||
if len(tfr.doc) <= 0 { |
||||
return fmt.Errorf("invalid term frequency key, empty docid") |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (tfr *TermFrequencyRow) parseV(value []byte) error { |
||||
var bytesRead int |
||||
tfr.freq, bytesRead = binary.Uvarint(value) |
||||
if bytesRead <= 0 { |
||||
return fmt.Errorf("invalid term frequency value, invalid frequency") |
||||
} |
||||
currOffset := bytesRead |
||||
|
||||
var norm uint64 |
||||
norm, bytesRead = binary.Uvarint(value[currOffset:]) |
||||
if bytesRead <= 0 { |
||||
return fmt.Errorf("invalid term frequency value, no norm") |
||||
} |
||||
currOffset += bytesRead |
||||
|
||||
tfr.norm = math.Float32frombits(uint32(norm)) |
||||
|
||||
tfr.vectors = nil |
||||
var field uint64 |
||||
field, bytesRead = binary.Uvarint(value[currOffset:]) |
||||
for bytesRead > 0 { |
||||
currOffset += bytesRead |
||||
tv := TermVector{} |
||||
tv.field = uint16(field) |
||||
// at this point we expect at least one term vector
|
||||
if tfr.vectors == nil { |
||||
tfr.vectors = make([]*TermVector, 0) |
||||
} |
||||
|
||||
tv.pos, bytesRead = binary.Uvarint(value[currOffset:]) |
||||
if bytesRead <= 0 { |
||||
return fmt.Errorf("invalid term frequency value, vector contains no position") |
||||
} |
||||
currOffset += bytesRead |
||||
|
||||
tv.start, bytesRead = binary.Uvarint(value[currOffset:]) |
||||
if bytesRead <= 0 { |
||||
return fmt.Errorf("invalid term frequency value, vector contains no start") |
||||
} |
||||
currOffset += bytesRead |
||||
|
||||
tv.end, bytesRead = binary.Uvarint(value[currOffset:]) |
||||
if bytesRead <= 0 { |
||||
return fmt.Errorf("invalid term frequency value, vector contains no end") |
||||
} |
||||
currOffset += bytesRead |
||||
|
||||
var arrayPositionsLen uint64 = 0 |
||||
arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:]) |
||||
if bytesRead <= 0 { |
||||
return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen") |
||||
} |
||||
currOffset += bytesRead |
||||
|
||||
if arrayPositionsLen > 0 { |
||||
tv.arrayPositions = make([]uint64, arrayPositionsLen) |
||||
for i := 0; uint64(i) < arrayPositionsLen; i++ { |
||||
tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:]) |
||||
if bytesRead <= 0 { |
||||
return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i) |
||||
} |
||||
currOffset += bytesRead |
||||
} |
||||
} |
||||
|
||||
tfr.vectors = append(tfr.vectors, &tv) |
||||
// try to read next record (may not exist)
|
||||
field, bytesRead = binary.Uvarint(value[currOffset:]) |
||||
} |
||||
if len(value[currOffset:]) > 0 && bytesRead <= 0 { |
||||
return fmt.Errorf("invalid term frequency value, vector field invalid") |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) { |
||||
rv, err := NewTermFrequencyRowK(key) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
err = rv.parseV(value) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return rv, nil |
||||
|
||||
} |
||||
|
||||
type BackIndexRow struct { |
||||
doc []byte |
||||
termEntries []*BackIndexTermEntry |
||||
storedEntries []*BackIndexStoreEntry |
||||
} |
||||
|
||||
func (br *BackIndexRow) AllTermKeys() [][]byte { |
||||
if br == nil { |
||||
return nil |
||||
} |
||||
rv := make([][]byte, len(br.termEntries)) |
||||
for i, termEntry := range br.termEntries { |
||||
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0) |
||||
rv[i] = termRow.Key() |
||||
} |
||||
return rv |
||||
} |
||||
|
||||
func (br *BackIndexRow) AllStoredKeys() [][]byte { |
||||
if br == nil { |
||||
return nil |
||||
} |
||||
rv := make([][]byte, len(br.storedEntries)) |
||||
for i, storedEntry := range br.storedEntries { |
||||
storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{}) |
||||
rv[i] = storedRow.Key() |
||||
} |
||||
return rv |
||||
} |
||||
|
||||
func (br *BackIndexRow) Key() []byte { |
||||
buf := make([]byte, br.KeySize()) |
||||
size, _ := br.KeyTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (br *BackIndexRow) KeySize() int { |
||||
return len(br.doc) + 1 |
||||
} |
||||
|
||||
func (br *BackIndexRow) KeyTo(buf []byte) (int, error) { |
||||
buf[0] = 'b' |
||||
used := copy(buf[1:], br.doc) |
||||
return used + 1, nil |
||||
} |
||||
|
||||
func (br *BackIndexRow) Value() []byte { |
||||
buf := make([]byte, br.ValueSize()) |
||||
size, _ := br.ValueTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (br *BackIndexRow) ValueSize() int { |
||||
birv := &BackIndexRowValue{ |
||||
TermEntries: br.termEntries, |
||||
StoredEntries: br.storedEntries, |
||||
} |
||||
return birv.Size() |
||||
} |
||||
|
||||
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { |
||||
birv := &BackIndexRowValue{ |
||||
TermEntries: br.termEntries, |
||||
StoredEntries: br.storedEntries, |
||||
} |
||||
return birv.MarshalTo(buf) |
||||
} |
||||
|
||||
func (br *BackIndexRow) String() string { |
||||
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries) |
||||
} |
||||
|
||||
func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { |
||||
return &BackIndexRow{ |
||||
doc: docID, |
||||
termEntries: entries, |
||||
storedEntries: storedFields, |
||||
} |
||||
} |
||||
|
||||
func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) { |
||||
rv := BackIndexRow{} |
||||
|
||||
buf := bytes.NewBuffer(key) |
||||
_, err := buf.ReadByte() // type
|
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
rv.doc, err = buf.ReadBytes(ByteSeparator) |
||||
if err == io.EOF && len(rv.doc) < 1 { |
||||
err = fmt.Errorf("invalid doc length 0 - % x", key) |
||||
} |
||||
if err != nil && err != io.EOF { |
||||
return nil, err |
||||
} else if err == nil { |
||||
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
|
||||
} |
||||
|
||||
var birv BackIndexRowValue |
||||
err = proto.Unmarshal(value, &birv) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv.termEntries = birv.TermEntries |
||||
rv.storedEntries = birv.StoredEntries |
||||
|
||||
return &rv, nil |
||||
} |
||||
|
||||
// STORED
|
||||
|
||||
type StoredRow struct { |
||||
doc []byte |
||||
field uint16 |
||||
arrayPositions []uint64 |
||||
typ byte |
||||
value []byte |
||||
} |
||||
|
||||
func (s *StoredRow) Key() []byte { |
||||
buf := make([]byte, s.KeySize()) |
||||
size, _ := s.KeyTo(buf) |
||||
return buf[0:size] |
||||
} |
||||
|
||||
func (s *StoredRow) KeySize() int { |
||||
return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions)) |
||||
} |
||||
|
||||
func (s *StoredRow) KeyTo(buf []byte) (int, error) { |
||||
docLen := len(s.doc) |
||||
buf[0] = 's' |
||||
copy(buf[1:], s.doc) |
||||
buf[1+docLen] = ByteSeparator |
||||
binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field) |
||||
bytesUsed := 1 + docLen + 1 + 2 |
||||
for _, arrayPosition := range s.arrayPositions { |
||||
varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition) |
||||
bytesUsed += varbytes |
||||
} |
||||
return bytesUsed, nil |
||||
} |
||||
|
||||
func (s *StoredRow) Value() []byte { |
||||
buf := make([]byte, s.ValueSize()) |
||||
size, _ := s.ValueTo(buf) |
||||
return buf[:size] |
||||
} |
||||
|
||||
func (s *StoredRow) ValueSize() int { |
||||
return len(s.value) + 1 |
||||
} |
||||
|
||||
func (s *StoredRow) ValueTo(buf []byte) (int, error) { |
||||
buf[0] = s.typ |
||||
used := copy(buf[1:], s.value) |
||||
return used + 1, nil |
||||
} |
||||
|
||||
func (s *StoredRow) String() string { |
||||
return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value) |
||||
} |
||||
|
||||
func (s *StoredRow) ScanPrefixForDoc() []byte { |
||||
docLen := len(s.doc) |
||||
buf := make([]byte, 1+docLen+1) |
||||
buf[0] = 's' |
||||
copy(buf[1:], s.doc) |
||||
buf[1+docLen] = ByteSeparator |
||||
return buf |
||||
} |
||||
|
||||
func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow { |
||||
return &StoredRow{ |
||||
doc: docID, |
||||
field: field, |
||||
arrayPositions: arrayPositions, |
||||
typ: typ, |
||||
value: value, |
||||
} |
||||
} |
||||
|
||||
func NewStoredRowK(key []byte) (*StoredRow, error) { |
||||
rv := StoredRow{} |
||||
|
||||
buf := bytes.NewBuffer(key) |
||||
_, err := buf.ReadByte() // type
|
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
rv.doc, err = buf.ReadBytes(ByteSeparator) |
||||
if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator
|
||||
err = fmt.Errorf("invalid doc length 0") |
||||
return nil, err |
||||
} |
||||
|
||||
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
|
||||
|
||||
err = binary.Read(buf, binary.LittleEndian, &rv.field) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
rv.arrayPositions = make([]uint64, 0) |
||||
nextArrayPos, err := binary.ReadUvarint(buf) |
||||
for err == nil { |
||||
rv.arrayPositions = append(rv.arrayPositions, nextArrayPos) |
||||
nextArrayPos, err = binary.ReadUvarint(buf) |
||||
} |
||||
return &rv, nil |
||||
} |
||||
|
||||
func NewStoredRowKV(key, value []byte) (*StoredRow, error) { |
||||
rv, err := NewStoredRowK(key) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rv.typ = value[0] |
||||
rv.value = value[1:] |
||||
return rv, nil |
||||
} |
@ -0,0 +1,76 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
) |
||||
|
||||
var mergeOperator upsideDownMerge |
||||
|
||||
var dictionaryTermIncr []byte |
||||
var dictionaryTermDecr []byte |
||||
|
||||
func init() { |
||||
dictionaryTermIncr = make([]byte, 8) |
||||
binary.LittleEndian.PutUint64(dictionaryTermIncr, uint64(1)) |
||||
dictionaryTermDecr = make([]byte, 8) |
||||
var negOne = int64(-1) |
||||
binary.LittleEndian.PutUint64(dictionaryTermDecr, uint64(negOne)) |
||||
} |
||||
|
||||
type upsideDownMerge struct{} |
||||
|
||||
func (m *upsideDownMerge) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) { |
||||
// set up record based on key
|
||||
dr, err := NewDictionaryRowK(key) |
||||
if err != nil { |
||||
return nil, false |
||||
} |
||||
if len(existingValue) > 0 { |
||||
// if existing value, parse it
|
||||
err = dr.parseDictionaryV(existingValue) |
||||
if err != nil { |
||||
return nil, false |
||||
} |
||||
} |
||||
|
||||
// now process operands
|
||||
for _, operand := range operands { |
||||
next := int64(binary.LittleEndian.Uint64(operand)) |
||||
if next < 0 && uint64(-next) > dr.count { |
||||
// subtracting next from existing would overflow
|
||||
dr.count = 0 |
||||
} else if next < 0 { |
||||
dr.count -= uint64(-next) |
||||
} else { |
||||
dr.count += uint64(next) |
||||
} |
||||
} |
||||
|
||||
return dr.Value(), true |
||||
} |
||||
|
||||
func (m *upsideDownMerge) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) { |
||||
left := int64(binary.LittleEndian.Uint64(leftOperand)) |
||||
right := int64(binary.LittleEndian.Uint64(rightOperand)) |
||||
rv := make([]byte, 8) |
||||
binary.LittleEndian.PutUint64(rv, uint64(left+right)) |
||||
return rv, true |
||||
} |
||||
|
||||
func (m *upsideDownMerge) Name() string { |
||||
return "upsideDownMerge" |
||||
} |
@ -0,0 +1,55 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package upsidedown |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"sync/atomic" |
||||
|
||||
"github.com/blevesearch/bleve/index/store" |
||||
) |
||||
|
||||
type indexStat struct { |
||||
updates, deletes, batches, errors uint64 |
||||
analysisTime, indexTime uint64 |
||||
termSearchersStarted uint64 |
||||
termSearchersFinished uint64 |
||||
numPlainTextBytesIndexed uint64 |
||||
i *UpsideDownCouch |
||||
} |
||||
|
||||
func (i *indexStat) statsMap() map[string]interface{} { |
||||
m := map[string]interface{}{} |
||||
m["updates"] = atomic.LoadUint64(&i.updates) |
||||
m["deletes"] = atomic.LoadUint64(&i.deletes) |
||||
m["batches"] = atomic.LoadUint64(&i.batches) |
||||
m["errors"] = atomic.LoadUint64(&i.errors) |
||||
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime) |
||||
m["index_time"] = atomic.LoadUint64(&i.indexTime) |
||||
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted) |
||||
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished) |
||||
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed) |
||||
|
||||
if o, ok := i.i.store.(store.KVStoreStats); ok { |
||||
m["kv"] = o.StatsMap() |
||||
} |
||||
|
||||
return m |
||||
} |
||||
|
||||
func (i *indexStat) MarshalJSON() ([]byte, error) { |
||||
m := i.statsMap() |
||||
return json.Marshal(m) |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,684 @@ |
||||
// Code generated by protoc-gen-gogo.
|
||||
// source: upsidedown.proto
|
||||
// DO NOT EDIT!
|
||||
|
||||
/* |
||||
Package upsidedown is a generated protocol buffer package. |
||||
|
||||
It is generated from these files: |
||||
upsidedown.proto |
||||
|
||||
It has these top-level messages: |
||||
BackIndexTermEntry |
||||
BackIndexStoreEntry |
||||
BackIndexRowValue |
||||
*/ |
||||
package upsidedown |
||||
|
||||
import proto "github.com/golang/protobuf/proto" |
||||
import math "math" |
||||
|
||||
import io "io" |
||||
import fmt "fmt" |
||||
import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto" |
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal |
||||
var _ = math.Inf |
||||
|
||||
type BackIndexTermEntry struct { |
||||
Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"` |
||||
Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"` |
||||
XXX_unrecognized []byte `json:"-"` |
||||
} |
||||
|
||||
func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} } |
||||
func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) } |
||||
func (*BackIndexTermEntry) ProtoMessage() {} |
||||
|
||||
func (m *BackIndexTermEntry) GetTerm() string { |
||||
if m != nil && m.Term != nil { |
||||
return *m.Term |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (m *BackIndexTermEntry) GetField() uint32 { |
||||
if m != nil && m.Field != nil { |
||||
return *m.Field |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
type BackIndexStoreEntry struct { |
||||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"` |
||||
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"` |
||||
XXX_unrecognized []byte `json:"-"` |
||||
} |
||||
|
||||
func (m *BackIndexStoreEntry) Reset() { *m = BackIndexStoreEntry{} } |
||||
func (m *BackIndexStoreEntry) String() string { return proto.CompactTextString(m) } |
||||
func (*BackIndexStoreEntry) ProtoMessage() {} |
||||
|
||||
func (m *BackIndexStoreEntry) GetField() uint32 { |
||||
if m != nil && m.Field != nil { |
||||
return *m.Field |
||||
} |
||||
return 0 |
||||
} |
||||
|
||||
func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 { |
||||
if m != nil { |
||||
return m.ArrayPositions |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
type BackIndexRowValue struct { |
||||
TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"` |
||||
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"` |
||||
XXX_unrecognized []byte `json:"-"` |
||||
} |
||||
|
||||
func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} } |
||||
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) } |
||||
func (*BackIndexRowValue) ProtoMessage() {} |
||||
|
||||
func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry { |
||||
if m != nil { |
||||
return m.TermEntries |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry { |
||||
if m != nil { |
||||
return m.StoredEntries |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (m *BackIndexTermEntry) Unmarshal(data []byte) error { |
||||
var hasFields [1]uint64 |
||||
l := len(data) |
||||
iNdEx := 0 |
||||
for iNdEx < l { |
||||
var wire uint64 |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
wire |= (uint64(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
fieldNum := int32(wire >> 3) |
||||
wireType := int(wire & 0x7) |
||||
switch fieldNum { |
||||
case 1: |
||||
if wireType != 2 { |
||||
return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) |
||||
} |
||||
var stringLen uint64 |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
stringLen |= (uint64(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
postIndex := iNdEx + int(stringLen) |
||||
if postIndex > l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
s := string(data[iNdEx:postIndex]) |
||||
m.Term = &s |
||||
iNdEx = postIndex |
||||
hasFields[0] |= uint64(0x00000001) |
||||
case 2: |
||||
if wireType != 0 { |
||||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) |
||||
} |
||||
var v uint32 |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
v |= (uint32(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
m.Field = &v |
||||
hasFields[0] |= uint64(0x00000002) |
||||
default: |
||||
var sizeOfWire int |
||||
for { |
||||
sizeOfWire++ |
||||
wire >>= 7 |
||||
if wire == 0 { |
||||
break |
||||
} |
||||
} |
||||
iNdEx -= sizeOfWire |
||||
skippy, err := skipUpsidedown(data[iNdEx:]) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if skippy < 0 { |
||||
return ErrInvalidLengthUpsidedown |
||||
} |
||||
if (iNdEx + skippy) > l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) |
||||
iNdEx += skippy |
||||
} |
||||
} |
||||
if hasFields[0]&uint64(0x00000001) == 0 { |
||||
return new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||
} |
||||
if hasFields[0]&uint64(0x00000002) == 0 { |
||||
return new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
func (m *BackIndexStoreEntry) Unmarshal(data []byte) error { |
||||
var hasFields [1]uint64 |
||||
l := len(data) |
||||
iNdEx := 0 |
||||
for iNdEx < l { |
||||
var wire uint64 |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
wire |= (uint64(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
fieldNum := int32(wire >> 3) |
||||
wireType := int(wire & 0x7) |
||||
switch fieldNum { |
||||
case 1: |
||||
if wireType != 0 { |
||||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) |
||||
} |
||||
var v uint32 |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
v |= (uint32(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
m.Field = &v |
||||
hasFields[0] |= uint64(0x00000001) |
||||
case 2: |
||||
if wireType != 0 { |
||||
return fmt.Errorf("proto: wrong wireType = %d for field ArrayPositions", wireType) |
||||
} |
||||
var v uint64 |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
v |= (uint64(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
m.ArrayPositions = append(m.ArrayPositions, v) |
||||
default: |
||||
var sizeOfWire int |
||||
for { |
||||
sizeOfWire++ |
||||
wire >>= 7 |
||||
if wire == 0 { |
||||
break |
||||
} |
||||
} |
||||
iNdEx -= sizeOfWire |
||||
skippy, err := skipUpsidedown(data[iNdEx:]) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if skippy < 0 { |
||||
return ErrInvalidLengthUpsidedown |
||||
} |
||||
if (iNdEx + skippy) > l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) |
||||
iNdEx += skippy |
||||
} |
||||
} |
||||
if hasFields[0]&uint64(0x00000001) == 0 { |
||||
return new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
func (m *BackIndexRowValue) Unmarshal(data []byte) error { |
||||
l := len(data) |
||||
iNdEx := 0 |
||||
for iNdEx < l { |
||||
var wire uint64 |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
wire |= (uint64(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
fieldNum := int32(wire >> 3) |
||||
wireType := int(wire & 0x7) |
||||
switch fieldNum { |
||||
case 1: |
||||
if wireType != 2 { |
||||
return fmt.Errorf("proto: wrong wireType = %d for field TermEntries", wireType) |
||||
} |
||||
var msglen int |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
msglen |= (int(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
postIndex := iNdEx + msglen |
||||
if msglen < 0 { |
||||
return ErrInvalidLengthUpsidedown |
||||
} |
||||
if postIndex > l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
m.TermEntries = append(m.TermEntries, &BackIndexTermEntry{}) |
||||
if err := m.TermEntries[len(m.TermEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { |
||||
return err |
||||
} |
||||
iNdEx = postIndex |
||||
case 2: |
||||
if wireType != 2 { |
||||
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType) |
||||
} |
||||
var msglen int |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
msglen |= (int(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
postIndex := iNdEx + msglen |
||||
if msglen < 0 { |
||||
return ErrInvalidLengthUpsidedown |
||||
} |
||||
if postIndex > l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{}) |
||||
if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { |
||||
return err |
||||
} |
||||
iNdEx = postIndex |
||||
default: |
||||
var sizeOfWire int |
||||
for { |
||||
sizeOfWire++ |
||||
wire >>= 7 |
||||
if wire == 0 { |
||||
break |
||||
} |
||||
} |
||||
iNdEx -= sizeOfWire |
||||
skippy, err := skipUpsidedown(data[iNdEx:]) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if skippy < 0 { |
||||
return ErrInvalidLengthUpsidedown |
||||
} |
||||
if (iNdEx + skippy) > l { |
||||
return io.ErrUnexpectedEOF |
||||
} |
||||
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) |
||||
iNdEx += skippy |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
func skipUpsidedown(data []byte) (n int, err error) { |
||||
l := len(data) |
||||
iNdEx := 0 |
||||
for iNdEx < l { |
||||
var wire uint64 |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return 0, io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
wire |= (uint64(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
wireType := int(wire & 0x7) |
||||
switch wireType { |
||||
case 0: |
||||
for { |
||||
if iNdEx >= l { |
||||
return 0, io.ErrUnexpectedEOF |
||||
} |
||||
iNdEx++ |
||||
if data[iNdEx-1] < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
return iNdEx, nil |
||||
case 1: |
||||
iNdEx += 8 |
||||
return iNdEx, nil |
||||
case 2: |
||||
var length int |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return 0, io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
length |= (int(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
iNdEx += length |
||||
if length < 0 { |
||||
return 0, ErrInvalidLengthUpsidedown |
||||
} |
||||
return iNdEx, nil |
||||
case 3: |
||||
for { |
||||
var innerWire uint64 |
||||
var start int = iNdEx |
||||
for shift := uint(0); ; shift += 7 { |
||||
if iNdEx >= l { |
||||
return 0, io.ErrUnexpectedEOF |
||||
} |
||||
b := data[iNdEx] |
||||
iNdEx++ |
||||
innerWire |= (uint64(b) & 0x7F) << shift |
||||
if b < 0x80 { |
||||
break |
||||
} |
||||
} |
||||
innerWireType := int(innerWire & 0x7) |
||||
if innerWireType == 4 { |
||||
break |
||||
} |
||||
next, err := skipUpsidedown(data[start:]) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
iNdEx = start + next |
||||
} |
||||
return iNdEx, nil |
||||
case 4: |
||||
return iNdEx, nil |
||||
case 5: |
||||
iNdEx += 4 |
||||
return iNdEx, nil |
||||
default: |
||||
return 0, fmt.Errorf("proto: illegal wireType %d", wireType) |
||||
} |
||||
} |
||||
panic("unreachable") |
||||
} |
||||
|
||||
var ( |
||||
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling") |
||||
) |
||||
|
||||
func (m *BackIndexTermEntry) Size() (n int) { |
||||
var l int |
||||
_ = l |
||||
if m.Term != nil { |
||||
l = len(*m.Term) |
||||
n += 1 + l + sovUpsidedown(uint64(l)) |
||||
} |
||||
if m.Field != nil { |
||||
n += 1 + sovUpsidedown(uint64(*m.Field)) |
||||
} |
||||
if m.XXX_unrecognized != nil { |
||||
n += len(m.XXX_unrecognized) |
||||
} |
||||
return n |
||||
} |
||||
|
||||
func (m *BackIndexStoreEntry) Size() (n int) { |
||||
var l int |
||||
_ = l |
||||
if m.Field != nil { |
||||
n += 1 + sovUpsidedown(uint64(*m.Field)) |
||||
} |
||||
if len(m.ArrayPositions) > 0 { |
||||
for _, e := range m.ArrayPositions { |
||||
n += 1 + sovUpsidedown(uint64(e)) |
||||
} |
||||
} |
||||
if m.XXX_unrecognized != nil { |
||||
n += len(m.XXX_unrecognized) |
||||
} |
||||
return n |
||||
} |
||||
|
||||
func (m *BackIndexRowValue) Size() (n int) { |
||||
var l int |
||||
_ = l |
||||
if len(m.TermEntries) > 0 { |
||||
for _, e := range m.TermEntries { |
||||
l = e.Size() |
||||
n += 1 + l + sovUpsidedown(uint64(l)) |
||||
} |
||||
} |
||||
if len(m.StoredEntries) > 0 { |
||||
for _, e := range m.StoredEntries { |
||||
l = e.Size() |
||||
n += 1 + l + sovUpsidedown(uint64(l)) |
||||
} |
||||
} |
||||
if m.XXX_unrecognized != nil { |
||||
n += len(m.XXX_unrecognized) |
||||
} |
||||
return n |
||||
} |
||||
|
||||
func sovUpsidedown(x uint64) (n int) { |
||||
for { |
||||
n++ |
||||
x >>= 7 |
||||
if x == 0 { |
||||
break |
||||
} |
||||
} |
||||
return n |
||||
} |
||||
func sozUpsidedown(x uint64) (n int) { |
||||
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63)))) |
||||
} |
||||
func (m *BackIndexTermEntry) Marshal() (data []byte, err error) { |
||||
size := m.Size() |
||||
data = make([]byte, size) |
||||
n, err := m.MarshalTo(data) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return data[:n], nil |
||||
} |
||||
|
||||
func (m *BackIndexTermEntry) MarshalTo(data []byte) (n int, err error) { |
||||
var i int |
||||
_ = i |
||||
var l int |
||||
_ = l |
||||
if m.Term == nil { |
||||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||
} else { |
||||
data[i] = 0xa |
||||
i++ |
||||
i = encodeVarintUpsidedown(data, i, uint64(len(*m.Term))) |
||||
i += copy(data[i:], *m.Term) |
||||
} |
||||
if m.Field == nil { |
||||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||
} else { |
||||
data[i] = 0x10 |
||||
i++ |
||||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) |
||||
} |
||||
if m.XXX_unrecognized != nil { |
||||
i += copy(data[i:], m.XXX_unrecognized) |
||||
} |
||||
return i, nil |
||||
} |
||||
|
||||
func (m *BackIndexStoreEntry) Marshal() (data []byte, err error) { |
||||
size := m.Size() |
||||
data = make([]byte, size) |
||||
n, err := m.MarshalTo(data) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return data[:n], nil |
||||
} |
||||
|
||||
func (m *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) { |
||||
var i int |
||||
_ = i |
||||
var l int |
||||
_ = l |
||||
if m.Field == nil { |
||||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||
} else { |
||||
data[i] = 0x8 |
||||
i++ |
||||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) |
||||
} |
||||
if len(m.ArrayPositions) > 0 { |
||||
for _, num := range m.ArrayPositions { |
||||
data[i] = 0x10 |
||||
i++ |
||||
i = encodeVarintUpsidedown(data, i, uint64(num)) |
||||
} |
||||
} |
||||
if m.XXX_unrecognized != nil { |
||||
i += copy(data[i:], m.XXX_unrecognized) |
||||
} |
||||
return i, nil |
||||
} |
||||
|
||||
func (m *BackIndexRowValue) Marshal() (data []byte, err error) { |
||||
size := m.Size() |
||||
data = make([]byte, size) |
||||
n, err := m.MarshalTo(data) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return data[:n], nil |
||||
} |
||||
|
||||
func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) { |
||||
var i int |
||||
_ = i |
||||
var l int |
||||
_ = l |
||||
if len(m.TermEntries) > 0 { |
||||
for _, msg := range m.TermEntries { |
||||
data[i] = 0xa |
||||
i++ |
||||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) |
||||
n, err := msg.MarshalTo(data[i:]) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
i += n |
||||
} |
||||
} |
||||
if len(m.StoredEntries) > 0 { |
||||
for _, msg := range m.StoredEntries { |
||||
data[i] = 0x12 |
||||
i++ |
||||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) |
||||
n, err := msg.MarshalTo(data[i:]) |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
i += n |
||||
} |
||||
} |
||||
if m.XXX_unrecognized != nil { |
||||
i += copy(data[i:], m.XXX_unrecognized) |
||||
} |
||||
return i, nil |
||||
} |
||||
|
||||
func encodeFixed64Upsidedown(data []byte, offset int, v uint64) int { |
||||
data[offset] = uint8(v) |
||||
data[offset+1] = uint8(v >> 8) |
||||
data[offset+2] = uint8(v >> 16) |
||||
data[offset+3] = uint8(v >> 24) |
||||
data[offset+4] = uint8(v >> 32) |
||||
data[offset+5] = uint8(v >> 40) |
||||
data[offset+6] = uint8(v >> 48) |
||||
data[offset+7] = uint8(v >> 56) |
||||
return offset + 8 |
||||
} |
||||
func encodeFixed32Upsidedown(data []byte, offset int, v uint32) int { |
||||
data[offset] = uint8(v) |
||||
data[offset+1] = uint8(v >> 8) |
||||
data[offset+2] = uint8(v >> 16) |
||||
data[offset+3] = uint8(v >> 24) |
||||
return offset + 4 |
||||
} |
||||
func encodeVarintUpsidedown(data []byte, offset int, v uint64) int { |
||||
for v >= 1<<7 { |
||||
data[offset] = uint8(v&0x7f | 0x80) |
||||
v >>= 7 |
||||
offset++ |
||||
} |
||||
data[offset] = uint8(v) |
||||
return offset + 1 |
||||
} |
@ -0,0 +1,14 @@ |
||||
message BackIndexTermEntry { |
||||
required string term = 1; |
||||
required uint32 field = 2; |
||||
} |
||||
|
||||
message BackIndexStoreEntry { |
||||
required uint32 field = 1; |
||||
repeated uint64 arrayPositions = 2; |
||||
} |
||||
|
||||
message BackIndexRowValue { |
||||
repeated BackIndexTermEntry termEntries = 1; |
||||
repeated BackIndexStoreEntry storedEntries = 2; |
||||
} |
@ -0,0 +1,37 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
// An IndexAlias is a wrapper around one or more
|
||||
// Index objects. It has two distinct modes of
|
||||
// operation.
|
||||
// 1. When it points to a single index, ALL index
|
||||
// operations are valid and will be passed through
|
||||
// to the underlying index.
|
||||
// 2. When it points to more than one index, the only
|
||||
// valid operation is Search. In this case the
|
||||
// search will be performed across all the
|
||||
// underlying indexes and the results merged.
|
||||
// Calls to Add/Remove/Swap the underlying indexes
|
||||
// are atomic, so you can safely change the
|
||||
// underlying Index objects while other components
|
||||
// are performing operations.
|
||||
type IndexAlias interface { |
||||
Index |
||||
|
||||
Add(i ...Index) |
||||
Remove(i ...Index) |
||||
Swap(in, out []Index) |
||||
} |
@ -0,0 +1,605 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"sort" |
||||
"sync" |
||||
"time" |
||||
|
||||
"golang.org/x/net/context" |
||||
|
||||
"github.com/blevesearch/bleve/document" |
||||
"github.com/blevesearch/bleve/index" |
||||
"github.com/blevesearch/bleve/index/store" |
||||
"github.com/blevesearch/bleve/mapping" |
||||
"github.com/blevesearch/bleve/search" |
||||
) |
||||
|
||||
type indexAliasImpl struct { |
||||
name string |
||||
indexes []Index |
||||
mutex sync.RWMutex |
||||
open bool |
||||
} |
||||
|
||||
// NewIndexAlias creates a new IndexAlias over the provided
|
||||
// Index objects.
|
||||
func NewIndexAlias(indexes ...Index) *indexAliasImpl { |
||||
return &indexAliasImpl{ |
||||
name: "alias", |
||||
indexes: indexes, |
||||
open: true, |
||||
} |
||||
} |
||||
|
||||
func (i *indexAliasImpl) isAliasToSingleIndex() error { |
||||
if len(i.indexes) < 1 { |
||||
return ErrorAliasEmpty |
||||
} else if len(i.indexes) > 1 { |
||||
return ErrorAliasMulti |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Index(id string, data interface{}) error { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
return i.indexes[0].Index(id, data) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Delete(id string) error { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
return i.indexes[0].Delete(id) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Batch(b *Batch) error { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
return i.indexes[0].Batch(b) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Document(id string) (*document.Document, error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return i.indexes[0].Document(id) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) DocCount() (uint64, error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
rv := uint64(0) |
||||
|
||||
if !i.open { |
||||
return 0, ErrorIndexClosed |
||||
} |
||||
|
||||
for _, index := range i.indexes { |
||||
otherCount, err := index.DocCount() |
||||
if err == nil { |
||||
rv += otherCount |
||||
} |
||||
// tolerate errors to produce partial counts
|
||||
} |
||||
|
||||
return rv, nil |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) { |
||||
return i.SearchInContext(context.Background(), req) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
if len(i.indexes) < 1 { |
||||
return nil, ErrorAliasEmpty |
||||
} |
||||
|
||||
// short circuit the simple case
|
||||
if len(i.indexes) == 1 { |
||||
return i.indexes[0].SearchInContext(ctx, req) |
||||
} |
||||
|
||||
return MultiSearch(ctx, req, i.indexes...) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Fields() ([]string, error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return i.indexes[0].Fields() |
||||
} |
||||
|
||||
func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) { |
||||
i.mutex.RLock() |
||||
|
||||
if !i.open { |
||||
i.mutex.RUnlock() |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
fieldDict, err := i.indexes[0].FieldDict(field) |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
return &indexAliasImplFieldDict{ |
||||
index: i, |
||||
fieldDict: fieldDict, |
||||
}, nil |
||||
} |
||||
|
||||
func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { |
||||
i.mutex.RLock() |
||||
|
||||
if !i.open { |
||||
i.mutex.RUnlock() |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm) |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
return &indexAliasImplFieldDict{ |
||||
index: i, |
||||
fieldDict: fieldDict, |
||||
}, nil |
||||
} |
||||
|
||||
func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { |
||||
i.mutex.RLock() |
||||
|
||||
if !i.open { |
||||
i.mutex.RUnlock() |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix) |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
return &indexAliasImplFieldDict{ |
||||
index: i, |
||||
fieldDict: fieldDict, |
||||
}, nil |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Close() error { |
||||
i.mutex.Lock() |
||||
defer i.mutex.Unlock() |
||||
|
||||
i.open = false |
||||
return nil |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Mapping() mapping.IndexMapping { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return nil |
||||
} |
||||
|
||||
return i.indexes[0].Mapping() |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Stats() *IndexStat { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return nil |
||||
} |
||||
|
||||
return i.indexes[0].Stats() |
||||
} |
||||
|
||||
func (i *indexAliasImpl) StatsMap() map[string]interface{} { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return nil |
||||
} |
||||
|
||||
return i.indexes[0].StatsMap() |
||||
} |
||||
|
||||
func (i *indexAliasImpl) GetInternal(key []byte) ([]byte, error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return i.indexes[0].GetInternal(key) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) SetInternal(key, val []byte) error { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
return i.indexes[0].SetInternal(key, val) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) DeleteInternal(key []byte) error { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
return i.indexes[0].DeleteInternal(key) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Advanced() (index.Index, store.KVStore, error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil, nil, ErrorIndexClosed |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
|
||||
return i.indexes[0].Advanced() |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Add(indexes ...Index) { |
||||
i.mutex.Lock() |
||||
defer i.mutex.Unlock() |
||||
|
||||
i.indexes = append(i.indexes, indexes...) |
||||
} |
||||
|
||||
func (i *indexAliasImpl) removeSingle(index Index) { |
||||
for pos, in := range i.indexes { |
||||
if in == index { |
||||
i.indexes = append(i.indexes[:pos], i.indexes[pos+1:]...) |
||||
break |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Remove(indexes ...Index) { |
||||
i.mutex.Lock() |
||||
defer i.mutex.Unlock() |
||||
|
||||
for _, in := range indexes { |
||||
i.removeSingle(in) |
||||
} |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Swap(in, out []Index) { |
||||
i.mutex.Lock() |
||||
defer i.mutex.Unlock() |
||||
|
||||
// add
|
||||
i.indexes = append(i.indexes, in...) |
||||
|
||||
// delete
|
||||
for _, ind := range out { |
||||
i.removeSingle(ind) |
||||
} |
||||
} |
||||
|
||||
// createChildSearchRequest creates a separate
|
||||
// request from the original
|
||||
// For now, avoid data race on req structure.
|
||||
// TODO disable highlight/field load on child
|
||||
// requests, and add code to do this only on
|
||||
// the actual final results.
|
||||
// Perhaps that part needs to be optional,
|
||||
// could be slower in remote usages.
|
||||
func createChildSearchRequest(req *SearchRequest) *SearchRequest { |
||||
rv := SearchRequest{ |
||||
Query: req.Query, |
||||
Size: req.Size + req.From, |
||||
From: 0, |
||||
Highlight: req.Highlight, |
||||
Fields: req.Fields, |
||||
Facets: req.Facets, |
||||
Explain: req.Explain, |
||||
Sort: req.Sort, |
||||
} |
||||
return &rv |
||||
} |
||||
|
||||
type asyncSearchResult struct { |
||||
Name string |
||||
Result *SearchResult |
||||
Err error |
||||
} |
||||
|
||||
// MultiSearch executes a SearchRequest across multiple Index objects,
|
||||
// then merges the results. The indexes must honor any ctx deadline.
|
||||
func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) { |
||||
|
||||
searchStart := time.Now() |
||||
asyncResults := make(chan *asyncSearchResult, len(indexes)) |
||||
|
||||
// run search on each index in separate go routine
|
||||
var waitGroup sync.WaitGroup |
||||
|
||||
var searchChildIndex = func(in Index, childReq *SearchRequest) { |
||||
rv := asyncSearchResult{Name: in.Name()} |
||||
rv.Result, rv.Err = in.SearchInContext(ctx, childReq) |
||||
asyncResults <- &rv |
||||
waitGroup.Done() |
||||
} |
||||
|
||||
waitGroup.Add(len(indexes)) |
||||
for _, in := range indexes { |
||||
go searchChildIndex(in, createChildSearchRequest(req)) |
||||
} |
||||
|
||||
// on another go routine, close after finished
|
||||
go func() { |
||||
waitGroup.Wait() |
||||
close(asyncResults) |
||||
}() |
||||
|
||||
var sr *SearchResult |
||||
indexErrors := make(map[string]error) |
||||
|
||||
for asr := range asyncResults { |
||||
if asr.Err == nil { |
||||
if sr == nil { |
||||
// first result
|
||||
sr = asr.Result |
||||
} else { |
||||
// merge with previous
|
||||
sr.Merge(asr.Result) |
||||
} |
||||
} else { |
||||
indexErrors[asr.Name] = asr.Err |
||||
} |
||||
} |
||||
|
||||
// merge just concatenated all the hits
|
||||
// now lets clean it up
|
||||
|
||||
// handle case where no results were successful
|
||||
if sr == nil { |
||||
sr = &SearchResult{ |
||||
Status: &SearchStatus{ |
||||
Errors: make(map[string]error), |
||||
}, |
||||
} |
||||
} |
||||
|
||||
// sort all hits with the requested order
|
||||
if len(req.Sort) > 0 { |
||||
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) |
||||
sort.Sort(sorter) |
||||
} |
||||
|
||||
// now skip over the correct From
|
||||
if req.From > 0 && len(sr.Hits) > req.From { |
||||
sr.Hits = sr.Hits[req.From:] |
||||
} else if req.From > 0 { |
||||
sr.Hits = search.DocumentMatchCollection{} |
||||
} |
||||
|
||||
// now trim to the correct size
|
||||
if req.Size > 0 && len(sr.Hits) > req.Size { |
||||
sr.Hits = sr.Hits[0:req.Size] |
||||
} |
||||
|
||||
// fix up facets
|
||||
for name, fr := range req.Facets { |
||||
sr.Facets.Fixup(name, fr.Size) |
||||
} |
||||
|
||||
// fix up original request
|
||||
sr.Request = req |
||||
searchDuration := time.Since(searchStart) |
||||
sr.Took = searchDuration |
||||
|
||||
// fix up errors
|
||||
if len(indexErrors) > 0 { |
||||
if sr.Status.Errors == nil { |
||||
sr.Status.Errors = make(map[string]error) |
||||
} |
||||
for indexName, indexErr := range indexErrors { |
||||
sr.Status.Errors[indexName] = indexErr |
||||
sr.Status.Total++ |
||||
sr.Status.Failed++ |
||||
} |
||||
} |
||||
|
||||
return sr, nil |
||||
} |
||||
|
||||
func (i *indexAliasImpl) NewBatch() *Batch { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil |
||||
} |
||||
|
||||
err := i.isAliasToSingleIndex() |
||||
if err != nil { |
||||
return nil |
||||
} |
||||
|
||||
return i.indexes[0].NewBatch() |
||||
} |
||||
|
||||
func (i *indexAliasImpl) Name() string { |
||||
return i.name |
||||
} |
||||
|
||||
func (i *indexAliasImpl) SetName(name string) { |
||||
i.name = name |
||||
} |
||||
|
||||
type indexAliasImplFieldDict struct { |
||||
index *indexAliasImpl |
||||
fieldDict index.FieldDict |
||||
} |
||||
|
||||
func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) { |
||||
return f.fieldDict.Next() |
||||
} |
||||
|
||||
func (f *indexAliasImplFieldDict) Close() error { |
||||
defer f.index.mutex.RUnlock() |
||||
return f.fieldDict.Close() |
||||
} |
||||
|
||||
type multiSearchHitSorter struct { |
||||
hits search.DocumentMatchCollection |
||||
sort search.SortOrder |
||||
cachedScoring []bool |
||||
cachedDesc []bool |
||||
} |
||||
|
||||
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter { |
||||
return &multiSearchHitSorter{ |
||||
sort: sort, |
||||
hits: hits, |
||||
cachedScoring: sort.CacheIsScore(), |
||||
cachedDesc: sort.CacheDescending(), |
||||
} |
||||
} |
||||
|
||||
func (m *multiSearchHitSorter) Len() int { return len(m.hits) } |
||||
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } |
||||
func (m *multiSearchHitSorter) Less(i, j int) bool { |
||||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) |
||||
return c < 0 |
||||
} |
@ -0,0 +1,729 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"fmt" |
||||
"os" |
||||
"sync" |
||||
"sync/atomic" |
||||
"time" |
||||
|
||||
"golang.org/x/net/context" |
||||
|
||||
"github.com/blevesearch/bleve/document" |
||||
"github.com/blevesearch/bleve/index" |
||||
"github.com/blevesearch/bleve/index/store" |
||||
"github.com/blevesearch/bleve/index/upsidedown" |
||||
"github.com/blevesearch/bleve/mapping" |
||||
"github.com/blevesearch/bleve/registry" |
||||
"github.com/blevesearch/bleve/search" |
||||
"github.com/blevesearch/bleve/search/collector" |
||||
"github.com/blevesearch/bleve/search/facet" |
||||
"github.com/blevesearch/bleve/search/highlight" |
||||
) |
||||
|
||||
type indexImpl struct { |
||||
path string |
||||
name string |
||||
meta *indexMeta |
||||
i index.Index |
||||
m mapping.IndexMapping |
||||
mutex sync.RWMutex |
||||
open bool |
||||
stats *IndexStat |
||||
} |
||||
|
||||
const storePath = "store" |
||||
|
||||
var mappingInternalKey = []byte("_mapping") |
||||
|
||||
func indexStorePath(path string) string { |
||||
return path + string(os.PathSeparator) + storePath |
||||
} |
||||
|
||||
func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (*indexImpl, error) { |
||||
// first validate the mapping
|
||||
err := mapping.Validate() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
if kvconfig == nil { |
||||
kvconfig = map[string]interface{}{} |
||||
} |
||||
|
||||
if kvstore == "" { |
||||
return nil, fmt.Errorf("bleve not configured for file based indexing") |
||||
} |
||||
|
||||
rv := indexImpl{ |
||||
path: path, |
||||
name: path, |
||||
m: mapping, |
||||
meta: newIndexMeta(indexType, kvstore, kvconfig), |
||||
} |
||||
rv.stats = &IndexStat{i: &rv} |
||||
// at this point there is hope that we can be successful, so save index meta
|
||||
if path != "" { |
||||
err = rv.meta.Save(path) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
kvconfig["create_if_missing"] = true |
||||
kvconfig["error_if_exists"] = true |
||||
kvconfig["path"] = indexStorePath(path) |
||||
} else { |
||||
kvconfig["path"] = "" |
||||
} |
||||
|
||||
// open the index
|
||||
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) |
||||
if indexTypeConstructor == nil { |
||||
return nil, ErrorUnknownIndexType |
||||
} |
||||
|
||||
rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
err = rv.i.Open() |
||||
if err != nil { |
||||
if err == index.ErrorUnknownStorageType { |
||||
return nil, ErrorUnknownStorageType |
||||
} |
||||
return nil, err |
||||
} |
||||
|
||||
// now persist the mapping
|
||||
mappingBytes, err := json.Marshal(mapping) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
err = rv.i.SetInternal(mappingInternalKey, mappingBytes) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// mark the index as open
|
||||
rv.mutex.Lock() |
||||
defer rv.mutex.Unlock() |
||||
rv.open = true |
||||
indexStats.Register(&rv) |
||||
return &rv, nil |
||||
} |
||||
|
||||
func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) { |
||||
rv = &indexImpl{ |
||||
path: path, |
||||
name: path, |
||||
} |
||||
rv.stats = &IndexStat{i: rv} |
||||
|
||||
rv.meta, err = openIndexMeta(path) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
// backwards compatibility if index type is missing
|
||||
if rv.meta.IndexType == "" { |
||||
rv.meta.IndexType = upsidedown.Name |
||||
} |
||||
|
||||
storeConfig := rv.meta.Config |
||||
if storeConfig == nil { |
||||
storeConfig = map[string]interface{}{} |
||||
} |
||||
|
||||
storeConfig["path"] = indexStorePath(path) |
||||
storeConfig["create_if_missing"] = false |
||||
storeConfig["error_if_exists"] = false |
||||
for rck, rcv := range runtimeConfig { |
||||
storeConfig[rck] = rcv |
||||
} |
||||
|
||||
// open the index
|
||||
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) |
||||
if indexTypeConstructor == nil { |
||||
return nil, ErrorUnknownIndexType |
||||
} |
||||
|
||||
rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
err = rv.i.Open() |
||||
if err != nil { |
||||
if err == index.ErrorUnknownStorageType { |
||||
return nil, ErrorUnknownStorageType |
||||
} |
||||
return nil, err |
||||
} |
||||
|
||||
// now load the mapping
|
||||
indexReader, err := rv.i.Reader() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
defer func() { |
||||
if cerr := indexReader.Close(); cerr != nil && err == nil { |
||||
err = cerr |
||||
} |
||||
}() |
||||
|
||||
mappingBytes, err := indexReader.GetInternal(mappingInternalKey) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
var im *mapping.IndexMappingImpl |
||||
err = json.Unmarshal(mappingBytes, &im) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes)) |
||||
} |
||||
|
||||
// mark the index as open
|
||||
rv.mutex.Lock() |
||||
defer rv.mutex.Unlock() |
||||
rv.open = true |
||||
|
||||
// validate the mapping
|
||||
err = im.Validate() |
||||
if err != nil { |
||||
// note even if the mapping is invalid
|
||||
// we still return an open usable index
|
||||
return rv, err |
||||
} |
||||
|
||||
rv.m = im |
||||
indexStats.Register(rv) |
||||
return rv, err |
||||
} |
||||
|
||||
// Advanced returns implementation internals
|
||||
// necessary ONLY for advanced usage.
|
||||
func (i *indexImpl) Advanced() (index.Index, store.KVStore, error) { |
||||
s, err := i.i.Advanced() |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
return i.i, s, nil |
||||
} |
||||
|
||||
// Mapping returns the IndexMapping in use by this
|
||||
// Index.
|
||||
func (i *indexImpl) Mapping() mapping.IndexMapping { |
||||
return i.m |
||||
} |
||||
|
||||
// Index the object with the specified identifier.
|
||||
// The IndexMapping for this index will determine
|
||||
// how the object is indexed.
|
||||
func (i *indexImpl) Index(id string, data interface{}) (err error) { |
||||
if id == "" { |
||||
return ErrorEmptyID |
||||
} |
||||
|
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
doc := document.NewDocument(id) |
||||
err = i.m.MapDocument(doc, data) |
||||
if err != nil { |
||||
return |
||||
} |
||||
err = i.i.Update(doc) |
||||
return |
||||
} |
||||
|
||||
// Delete entries for the specified identifier from
|
||||
// the index.
|
||||
func (i *indexImpl) Delete(id string) (err error) { |
||||
if id == "" { |
||||
return ErrorEmptyID |
||||
} |
||||
|
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
err = i.i.Delete(id) |
||||
return |
||||
} |
||||
|
||||
// Batch executes multiple Index and Delete
|
||||
// operations at the same time. There are often
|
||||
// significant performance benefits when performing
|
||||
// operations in a batch.
|
||||
func (i *indexImpl) Batch(b *Batch) error { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
return i.i.Batch(b.internal) |
||||
} |
||||
|
||||
// Document is used to find the values of all the
|
||||
// stored fields for a document in the index. These
|
||||
// stored fields are put back into a Document object
|
||||
// and returned.
|
||||
func (i *indexImpl) Document(id string) (doc *document.Document, err error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
indexReader, err := i.i.Reader() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
defer func() { |
||||
if cerr := indexReader.Close(); err == nil && cerr != nil { |
||||
err = cerr |
||||
} |
||||
}() |
||||
|
||||
doc, err = indexReader.Document(id) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return doc, nil |
||||
} |
||||
|
||||
// DocCount returns the number of documents in the
|
||||
// index.
|
||||
func (i *indexImpl) DocCount() (count uint64, err error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return 0, ErrorIndexClosed |
||||
} |
||||
|
||||
// open a reader for this search
|
||||
indexReader, err := i.i.Reader() |
||||
if err != nil { |
||||
return 0, fmt.Errorf("error opening index reader %v", err) |
||||
} |
||||
defer func() { |
||||
if cerr := indexReader.Close(); err == nil && cerr != nil { |
||||
err = cerr |
||||
} |
||||
}() |
||||
|
||||
count, err = indexReader.DocCount() |
||||
return |
||||
} |
||||
|
||||
// Search executes a search request operation.
|
||||
// Returns a SearchResult object or an error.
|
||||
func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) { |
||||
return i.SearchInContext(context.Background(), req) |
||||
} |
||||
|
||||
// SearchInContext executes a search request operation within the provided
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
searchStart := time.Now() |
||||
|
||||
if !i.open { |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) |
||||
|
||||
// open a reader for this search
|
||||
indexReader, err := i.i.Reader() |
||||
if err != nil { |
||||
return nil, fmt.Errorf("error opening index reader %v", err) |
||||
} |
||||
defer func() { |
||||
if cerr := indexReader.Close(); err == nil && cerr != nil { |
||||
err = cerr |
||||
} |
||||
}() |
||||
|
||||
searcher, err := req.Query.Searcher(indexReader, i.m, req.Explain) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
defer func() { |
||||
if serr := searcher.Close(); err == nil && serr != nil { |
||||
err = serr |
||||
} |
||||
}() |
||||
|
||||
if req.Facets != nil { |
||||
facetsBuilder := search.NewFacetsBuilder(indexReader) |
||||
for facetName, facetRequest := range req.Facets { |
||||
if facetRequest.NumericRanges != nil { |
||||
// build numeric range facet
|
||||
facetBuilder := facet.NewNumericFacetBuilder(facetRequest.Field, facetRequest.Size) |
||||
for _, nr := range facetRequest.NumericRanges { |
||||
facetBuilder.AddRange(nr.Name, nr.Min, nr.Max) |
||||
} |
||||
facetsBuilder.Add(facetName, facetBuilder) |
||||
} else if facetRequest.DateTimeRanges != nil { |
||||
// build date range facet
|
||||
facetBuilder := facet.NewDateTimeFacetBuilder(facetRequest.Field, facetRequest.Size) |
||||
dateTimeParser := i.m.DateTimeParserNamed("") |
||||
for _, dr := range facetRequest.DateTimeRanges { |
||||
start, end := dr.ParseDates(dateTimeParser) |
||||
facetBuilder.AddRange(dr.Name, start, end) |
||||
} |
||||
facetsBuilder.Add(facetName, facetBuilder) |
||||
} else { |
||||
// build terms facet
|
||||
facetBuilder := facet.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size) |
||||
facetsBuilder.Add(facetName, facetBuilder) |
||||
} |
||||
} |
||||
collector.SetFacetsBuilder(facetsBuilder) |
||||
} |
||||
|
||||
err = collector.Collect(ctx, searcher, indexReader) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
hits := collector.Results() |
||||
|
||||
var highlighter highlight.Highlighter |
||||
|
||||
if req.Highlight != nil { |
||||
// get the right highlighter
|
||||
highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if req.Highlight.Style != nil { |
||||
highlighter, err = Config.Cache.HighlighterNamed(*req.Highlight.Style) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
} |
||||
if highlighter == nil { |
||||
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style) |
||||
} |
||||
} |
||||
|
||||
for _, hit := range hits { |
||||
if len(req.Fields) > 0 || highlighter != nil { |
||||
doc, err := indexReader.Document(hit.ID) |
||||
if err == nil && doc != nil { |
||||
if len(req.Fields) > 0 { |
||||
for _, f := range req.Fields { |
||||
for _, docF := range doc.Fields { |
||||
if f == "*" || docF.Name() == f { |
||||
var value interface{} |
||||
switch docF := docF.(type) { |
||||
case *document.TextField: |
||||
value = string(docF.Value()) |
||||
case *document.NumericField: |
||||
num, err := docF.Number() |
||||
if err == nil { |
||||
value = num |
||||
} |
||||
case *document.DateTimeField: |
||||
datetime, err := docF.DateTime() |
||||
if err == nil { |
||||
value = datetime.Format(time.RFC3339) |
||||
} |
||||
case *document.BooleanField: |
||||
boolean, err := docF.Boolean() |
||||
if err == nil { |
||||
value = boolean |
||||
} |
||||
} |
||||
if value != nil { |
||||
hit.AddFieldValue(docF.Name(), value) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
if highlighter != nil { |
||||
highlightFields := req.Highlight.Fields |
||||
if highlightFields == nil { |
||||
// add all fields with matches
|
||||
highlightFields = make([]string, 0, len(hit.Locations)) |
||||
for k := range hit.Locations { |
||||
highlightFields = append(highlightFields, k) |
||||
} |
||||
} |
||||
for _, hf := range highlightFields { |
||||
highlighter.BestFragmentsInField(hit, doc, hf, 1) |
||||
} |
||||
} |
||||
} else if doc == nil { |
||||
// unexpected case, a doc ID that was found as a search hit
|
||||
// was unable to be found during document lookup
|
||||
return nil, ErrorIndexReadInconsistency |
||||
} |
||||
} |
||||
if i.name != "" { |
||||
hit.Index = i.name |
||||
} |
||||
} |
||||
|
||||
atomic.AddUint64(&i.stats.searches, 1) |
||||
searchDuration := time.Since(searchStart) |
||||
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration)) |
||||
|
||||
if Config.SlowSearchLogThreshold > 0 && |
||||
searchDuration > Config.SlowSearchLogThreshold { |
||||
logger.Printf("slow search took %s - %v", searchDuration, req) |
||||
} |
||||
|
||||
return &SearchResult{ |
||||
Status: &SearchStatus{ |
||||
Total: 1, |
||||
Failed: 0, |
||||
Successful: 1, |
||||
Errors: make(map[string]error), |
||||
}, |
||||
Request: req, |
||||
Hits: hits, |
||||
Total: collector.Total(), |
||||
MaxScore: collector.MaxScore(), |
||||
Took: searchDuration, |
||||
Facets: collector.FacetResults(), |
||||
}, nil |
||||
} |
||||
|
||||
// Fields returns the name of all the fields this
|
||||
// Index has operated on.
|
||||
func (i *indexImpl) Fields() (fields []string, err error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
indexReader, err := i.i.Reader() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
defer func() { |
||||
if cerr := indexReader.Close(); err == nil && cerr != nil { |
||||
err = cerr |
||||
} |
||||
}() |
||||
|
||||
fields, err = indexReader.Fields() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return fields, nil |
||||
} |
||||
|
||||
func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) { |
||||
i.mutex.RLock() |
||||
|
||||
if !i.open { |
||||
i.mutex.RUnlock() |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
indexReader, err := i.i.Reader() |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
fieldDict, err := indexReader.FieldDict(field) |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
return &indexImplFieldDict{ |
||||
index: i, |
||||
indexReader: indexReader, |
||||
fieldDict: fieldDict, |
||||
}, nil |
||||
} |
||||
|
||||
func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { |
||||
i.mutex.RLock() |
||||
|
||||
if !i.open { |
||||
i.mutex.RUnlock() |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
indexReader, err := i.i.Reader() |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm) |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
return &indexImplFieldDict{ |
||||
index: i, |
||||
indexReader: indexReader, |
||||
fieldDict: fieldDict, |
||||
}, nil |
||||
} |
||||
|
||||
func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { |
||||
i.mutex.RLock() |
||||
|
||||
if !i.open { |
||||
i.mutex.RUnlock() |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
indexReader, err := i.i.Reader() |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix) |
||||
if err != nil { |
||||
i.mutex.RUnlock() |
||||
return nil, err |
||||
} |
||||
|
||||
return &indexImplFieldDict{ |
||||
index: i, |
||||
indexReader: indexReader, |
||||
fieldDict: fieldDict, |
||||
}, nil |
||||
} |
||||
|
||||
func (i *indexImpl) Close() error { |
||||
i.mutex.Lock() |
||||
defer i.mutex.Unlock() |
||||
|
||||
indexStats.UnRegister(i) |
||||
|
||||
i.open = false |
||||
return i.i.Close() |
||||
} |
||||
|
||||
func (i *indexImpl) Stats() *IndexStat { |
||||
return i.stats |
||||
} |
||||
|
||||
func (i *indexImpl) StatsMap() map[string]interface{} { |
||||
return i.stats.statsMap() |
||||
} |
||||
|
||||
func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return nil, ErrorIndexClosed |
||||
} |
||||
|
||||
reader, err := i.i.Reader() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
defer func() { |
||||
if cerr := reader.Close(); err == nil && cerr != nil { |
||||
err = cerr |
||||
} |
||||
}() |
||||
|
||||
val, err = reader.GetInternal(key) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return val, nil |
||||
} |
||||
|
||||
func (i *indexImpl) SetInternal(key, val []byte) error { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
return i.i.SetInternal(key, val) |
||||
} |
||||
|
||||
func (i *indexImpl) DeleteInternal(key []byte) error { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
|
||||
if !i.open { |
||||
return ErrorIndexClosed |
||||
} |
||||
|
||||
return i.i.DeleteInternal(key) |
||||
} |
||||
|
||||
// NewBatch creates a new empty batch.
|
||||
func (i *indexImpl) NewBatch() *Batch { |
||||
return &Batch{ |
||||
index: i, |
||||
internal: index.NewBatch(), |
||||
} |
||||
} |
||||
|
||||
func (i *indexImpl) Name() string { |
||||
return i.name |
||||
} |
||||
|
||||
func (i *indexImpl) SetName(name string) { |
||||
indexStats.UnRegister(i) |
||||
i.name = name |
||||
indexStats.Register(i) |
||||
} |
||||
|
||||
type indexImplFieldDict struct { |
||||
index *indexImpl |
||||
indexReader index.IndexReader |
||||
fieldDict index.FieldDict |
||||
} |
||||
|
||||
func (f *indexImplFieldDict) Next() (*index.DictEntry, error) { |
||||
return f.fieldDict.Next() |
||||
} |
||||
|
||||
func (f *indexImplFieldDict) Close() error { |
||||
defer f.index.mutex.RUnlock() |
||||
err := f.fieldDict.Close() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
return f.indexReader.Close() |
||||
} |
@ -0,0 +1,96 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"io/ioutil" |
||||
"os" |
||||
|
||||
"github.com/blevesearch/bleve/index/upsidedown" |
||||
) |
||||
|
||||
const metaFilename = "index_meta.json" |
||||
|
||||
type indexMeta struct { |
||||
Storage string `json:"storage"` |
||||
IndexType string `json:"index_type"` |
||||
Config map[string]interface{} `json:"config,omitempty"` |
||||
} |
||||
|
||||
func newIndexMeta(indexType string, storage string, config map[string]interface{}) *indexMeta { |
||||
return &indexMeta{ |
||||
IndexType: indexType, |
||||
Storage: storage, |
||||
Config: config, |
||||
} |
||||
} |
||||
|
||||
func openIndexMeta(path string) (*indexMeta, error) { |
||||
if _, err := os.Stat(path); os.IsNotExist(err) { |
||||
return nil, ErrorIndexPathDoesNotExist |
||||
} |
||||
indexMetaPath := indexMetaPath(path) |
||||
metaBytes, err := ioutil.ReadFile(indexMetaPath) |
||||
if err != nil { |
||||
return nil, ErrorIndexMetaMissing |
||||
} |
||||
var im indexMeta |
||||
err = json.Unmarshal(metaBytes, &im) |
||||
if err != nil { |
||||
return nil, ErrorIndexMetaCorrupt |
||||
} |
||||
if im.IndexType == "" { |
||||
im.IndexType = upsidedown.Name |
||||
} |
||||
return &im, nil |
||||
} |
||||
|
||||
func (i *indexMeta) Save(path string) (err error) { |
||||
indexMetaPath := indexMetaPath(path) |
||||
// ensure any necessary parent directories exist
|
||||
err = os.MkdirAll(path, 0700) |
||||
if err != nil { |
||||
if os.IsExist(err) { |
||||
return ErrorIndexPathExists |
||||
} |
||||
return err |
||||
} |
||||
metaBytes, err := json.Marshal(i) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
indexMetaFile, err := os.OpenFile(indexMetaPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666) |
||||
if err != nil { |
||||
if os.IsExist(err) { |
||||
return ErrorIndexPathExists |
||||
} |
||||
return err |
||||
} |
||||
defer func() { |
||||
if ierr := indexMetaFile.Close(); err == nil && ierr != nil { |
||||
err = ierr |
||||
} |
||||
}() |
||||
_, err = indexMetaFile.Write(metaBytes) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func indexMetaPath(path string) string { |
||||
return path + string(os.PathSeparator) + metaFilename |
||||
} |
@ -0,0 +1,75 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"sync" |
||||
"sync/atomic" |
||||
) |
||||
|
||||
type IndexStat struct { |
||||
searches uint64 |
||||
searchTime uint64 |
||||
i *indexImpl |
||||
} |
||||
|
||||
func (is *IndexStat) statsMap() map[string]interface{} { |
||||
m := map[string]interface{}{} |
||||
m["index"] = is.i.i.StatsMap() |
||||
m["searches"] = atomic.LoadUint64(&is.searches) |
||||
m["search_time"] = atomic.LoadUint64(&is.searchTime) |
||||
return m |
||||
} |
||||
|
||||
func (is *IndexStat) MarshalJSON() ([]byte, error) { |
||||
m := is.statsMap() |
||||
return json.Marshal(m) |
||||
} |
||||
|
||||
type IndexStats struct { |
||||
indexes map[string]*IndexStat |
||||
mutex sync.RWMutex |
||||
} |
||||
|
||||
func NewIndexStats() *IndexStats { |
||||
return &IndexStats{ |
||||
indexes: make(map[string]*IndexStat), |
||||
} |
||||
} |
||||
|
||||
func (i *IndexStats) Register(index Index) { |
||||
i.mutex.Lock() |
||||
defer i.mutex.Unlock() |
||||
i.indexes[index.Name()] = index.Stats() |
||||
} |
||||
|
||||
func (i *IndexStats) UnRegister(index Index) { |
||||
i.mutex.Lock() |
||||
defer i.mutex.Unlock() |
||||
delete(i.indexes, index.Name()) |
||||
} |
||||
|
||||
func (i *IndexStats) String() string { |
||||
i.mutex.RLock() |
||||
defer i.mutex.RUnlock() |
||||
bytes, err := json.Marshal(i.indexes) |
||||
if err != nil { |
||||
return "error marshaling stats" |
||||
} |
||||
return string(bytes) |
||||
} |
||||
|
||||
var indexStats *IndexStats |
@ -0,0 +1,61 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
import "github.com/blevesearch/bleve/mapping" |
||||
|
||||
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
|
||||
func NewIndexMapping() *mapping.IndexMappingImpl { |
||||
return mapping.NewIndexMapping() |
||||
} |
||||
|
||||
// NewDocumentMapping returns a new document mapping
|
||||
// with all the default values.
|
||||
func NewDocumentMapping() *mapping.DocumentMapping { |
||||
return mapping.NewDocumentMapping() |
||||
} |
||||
|
||||
// NewDocumentStaticMapping returns a new document
|
||||
// mapping that will not automatically index parts
|
||||
// of a document without an explicit mapping.
|
||||
func NewDocumentStaticMapping() *mapping.DocumentMapping { |
||||
return mapping.NewDocumentStaticMapping() |
||||
} |
||||
|
||||
// NewDocumentDisabledMapping returns a new document
|
||||
// mapping that will not perform any indexing.
|
||||
func NewDocumentDisabledMapping() *mapping.DocumentMapping { |
||||
return mapping.NewDocumentDisabledMapping() |
||||
} |
||||
|
||||
// NewTextFieldMapping returns a default field mapping for text
|
||||
func NewTextFieldMapping() *mapping.FieldMapping { |
||||
return mapping.NewTextFieldMapping() |
||||
} |
||||
|
||||
// NewNumericFieldMapping returns a default field mapping for numbers
|
||||
func NewNumericFieldMapping() *mapping.FieldMapping { |
||||
return mapping.NewNumericFieldMapping() |
||||
} |
||||
|
||||
// NewDateTimeFieldMapping returns a default field mapping for dates
|
||||
func NewDateTimeFieldMapping() *mapping.FieldMapping { |
||||
return mapping.NewDateTimeFieldMapping() |
||||
} |
||||
|
||||
// NewBooleanFieldMapping returns a default field mapping for booleans
|
||||
func NewBooleanFieldMapping() *mapping.FieldMapping { |
||||
return mapping.NewBooleanFieldMapping() |
||||
} |
@ -0,0 +1,99 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mapping |
||||
|
||||
type customAnalysis struct { |
||||
CharFilters map[string]map[string]interface{} `json:"char_filters,omitempty"` |
||||
Tokenizers map[string]map[string]interface{} `json:"tokenizers,omitempty"` |
||||
TokenMaps map[string]map[string]interface{} `json:"token_maps,omitempty"` |
||||
TokenFilters map[string]map[string]interface{} `json:"token_filters,omitempty"` |
||||
Analyzers map[string]map[string]interface{} `json:"analyzers,omitempty"` |
||||
DateTimeParsers map[string]map[string]interface{} `json:"date_time_parsers,omitempty"` |
||||
} |
||||
|
||||
func (c *customAnalysis) registerAll(i *IndexMappingImpl) error { |
||||
for name, config := range c.CharFilters { |
||||
_, err := i.cache.DefineCharFilter(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
|
||||
if len(c.Tokenizers) > 0 { |
||||
// put all the names in map tracking work to do
|
||||
todo := map[string]struct{}{} |
||||
for name := range c.Tokenizers { |
||||
todo[name] = struct{}{} |
||||
} |
||||
registered := 1 |
||||
errs := []error{} |
||||
// as long as we keep making progress, keep going
|
||||
for len(todo) > 0 && registered > 0 { |
||||
registered = 0 |
||||
errs = []error{} |
||||
for name := range todo { |
||||
config := c.Tokenizers[name] |
||||
_, err := i.cache.DefineTokenizer(name, config) |
||||
if err != nil { |
||||
errs = append(errs, err) |
||||
} else { |
||||
delete(todo, name) |
||||
registered++ |
||||
} |
||||
} |
||||
} |
||||
|
||||
if len(errs) > 0 { |
||||
return errs[0] |
||||
} |
||||
} |
||||
for name, config := range c.TokenMaps { |
||||
_, err := i.cache.DefineTokenMap(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
for name, config := range c.TokenFilters { |
||||
_, err := i.cache.DefineTokenFilter(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
for name, config := range c.Analyzers { |
||||
_, err := i.cache.DefineAnalyzer(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
for name, config := range c.DateTimeParsers { |
||||
_, err := i.cache.DefineDateTimeParser(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func newCustomAnalysis() *customAnalysis { |
||||
rv := customAnalysis{ |
||||
CharFilters: make(map[string]map[string]interface{}), |
||||
Tokenizers: make(map[string]map[string]interface{}), |
||||
TokenMaps: make(map[string]map[string]interface{}), |
||||
TokenFilters: make(map[string]map[string]interface{}), |
||||
Analyzers: make(map[string]map[string]interface{}), |
||||
DateTimeParsers: make(map[string]map[string]interface{}), |
||||
} |
||||
return &rv |
||||
} |
@ -0,0 +1,490 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mapping |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"fmt" |
||||
"reflect" |
||||
"time" |
||||
|
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
// A DocumentMapping describes how a type of document
|
||||
// should be indexed.
|
||||
// As documents can be hierarchical, named sub-sections
|
||||
// of documents are mapped using the same structure in
|
||||
// the Properties field.
|
||||
// Each value inside a document can be indexed 0 or more
|
||||
// ways. These index entries are called fields and
|
||||
// are stored in the Fields field.
|
||||
// Entire sections of a document can be ignored or
|
||||
// excluded by setting Enabled to false.
|
||||
// If not explicitly mapped, default mapping operations
|
||||
// are used. To disable this automatic handling, set
|
||||
// Dynamic to false.
|
||||
type DocumentMapping struct { |
||||
Enabled bool `json:"enabled"` |
||||
Dynamic bool `json:"dynamic"` |
||||
Properties map[string]*DocumentMapping `json:"properties,omitempty"` |
||||
Fields []*FieldMapping `json:"fields,omitempty"` |
||||
DefaultAnalyzer string `json:"default_analyzer"` |
||||
|
||||
// StructTagKey overrides "json" when looking for field names in struct tags
|
||||
StructTagKey string `json:"struct_tag_key,omitempty"` |
||||
} |
||||
|
||||
func (dm *DocumentMapping) Validate(cache *registry.Cache) error { |
||||
var err error |
||||
if dm.DefaultAnalyzer != "" { |
||||
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
for _, property := range dm.Properties { |
||||
err = property.Validate(cache) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
for _, field := range dm.Fields { |
||||
if field.Analyzer != "" { |
||||
_, err = cache.AnalyzerNamed(field.Analyzer) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
if field.DateFormat != "" { |
||||
_, err = cache.DateTimeParserNamed(field.DateFormat) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
switch field.Type { |
||||
case "text", "datetime", "number", "boolean": |
||||
default: |
||||
return fmt.Errorf("unknown field type: '%s'", field.Type) |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// analyzerNameForPath attempts to first find the field
|
||||
// described by this path, then returns the analyzer
|
||||
// configured for that field
|
||||
func (dm *DocumentMapping) analyzerNameForPath(path string) string { |
||||
field := dm.fieldDescribedByPath(path) |
||||
if field != nil { |
||||
return field.Analyzer |
||||
} |
||||
return "" |
||||
} |
||||
|
||||
func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping { |
||||
pathElements := decodePath(path) |
||||
if len(pathElements) > 1 { |
||||
// easy case, there is more than 1 path element remaining
|
||||
// the next path element must match a property name
|
||||
// at this level
|
||||
for propName, subDocMapping := range dm.Properties { |
||||
if propName == pathElements[0] { |
||||
return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:])) |
||||
} |
||||
} |
||||
} else { |
||||
// just 1 path elememnt
|
||||
// first look for property name with empty field
|
||||
for propName, subDocMapping := range dm.Properties { |
||||
if propName == pathElements[0] { |
||||
// found property name match, now look at its fields
|
||||
for _, field := range subDocMapping.Fields { |
||||
if field.Name == "" || field.Name == pathElements[0] { |
||||
// match
|
||||
return field |
||||
} |
||||
} |
||||
} |
||||
} |
||||
// next, walk the properties again, looking for field overriding the name
|
||||
for propName, subDocMapping := range dm.Properties { |
||||
if propName != pathElements[0] { |
||||
// property name isn't a match, but field name could override it
|
||||
for _, field := range subDocMapping.Fields { |
||||
if field.Name == pathElements[0] { |
||||
return field |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// documentMappingForPath only returns EXACT matches for a sub document
|
||||
// or for an explicitly mapped field, if you want to find the
|
||||
// closest document mapping to a field not explicitly mapped
|
||||
// use closestDocMapping
|
||||
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping { |
||||
pathElements := decodePath(path) |
||||
current := dm |
||||
OUTER: |
||||
for i, pathElement := range pathElements { |
||||
for name, subDocMapping := range current.Properties { |
||||
if name == pathElement { |
||||
current = subDocMapping |
||||
continue OUTER |
||||
} |
||||
} |
||||
// no subDocMapping matches this pathElement
|
||||
// only if this is the last element check for field name
|
||||
if i == len(pathElements)-1 { |
||||
for _, field := range current.Fields { |
||||
if field.Name == pathElement { |
||||
break |
||||
} |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
return current |
||||
} |
||||
|
||||
// closestDocMapping findest the most specific document mapping that matches
|
||||
// part of the provided path
|
||||
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping { |
||||
pathElements := decodePath(path) |
||||
current := dm |
||||
OUTER: |
||||
for _, pathElement := range pathElements { |
||||
for name, subDocMapping := range current.Properties { |
||||
if name == pathElement { |
||||
current = subDocMapping |
||||
continue OUTER |
||||
} |
||||
} |
||||
} |
||||
return current |
||||
} |
||||
|
||||
// NewDocumentMapping returns a new document mapping
|
||||
// with all the default values.
|
||||
func NewDocumentMapping() *DocumentMapping { |
||||
return &DocumentMapping{ |
||||
Enabled: true, |
||||
Dynamic: true, |
||||
} |
||||
} |
||||
|
||||
// NewDocumentStaticMapping returns a new document
|
||||
// mapping that will not automatically index parts
|
||||
// of a document without an explicit mapping.
|
||||
func NewDocumentStaticMapping() *DocumentMapping { |
||||
return &DocumentMapping{ |
||||
Enabled: true, |
||||
} |
||||
} |
||||
|
||||
// NewDocumentDisabledMapping returns a new document
|
||||
// mapping that will not perform any indexing.
|
||||
func NewDocumentDisabledMapping() *DocumentMapping { |
||||
return &DocumentMapping{} |
||||
} |
||||
|
||||
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
|
||||
// for the specified named subsection.
|
||||
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) { |
||||
if dm.Properties == nil { |
||||
dm.Properties = make(map[string]*DocumentMapping) |
||||
} |
||||
dm.Properties[property] = sdm |
||||
} |
||||
|
||||
// AddFieldMappingsAt adds one or more FieldMappings
|
||||
// at the named sub-document. If the named sub-document
|
||||
// doesn't yet exist it is created for you.
|
||||
// This is a convenience function to make most common
|
||||
// mappings more concise.
|
||||
// Otherwise, you would:
|
||||
// subMapping := NewDocumentMapping()
|
||||
// subMapping.AddFieldMapping(fieldMapping)
|
||||
// parentMapping.AddSubDocumentMapping(property, subMapping)
|
||||
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) { |
||||
if dm.Properties == nil { |
||||
dm.Properties = make(map[string]*DocumentMapping) |
||||
} |
||||
sdm, ok := dm.Properties[property] |
||||
if !ok { |
||||
sdm = NewDocumentMapping() |
||||
} |
||||
for _, fm := range fms { |
||||
sdm.AddFieldMapping(fm) |
||||
} |
||||
dm.Properties[property] = sdm |
||||
} |
||||
|
||||
// AddFieldMapping adds the provided FieldMapping for this section
|
||||
// of the document.
|
||||
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) { |
||||
if dm.Fields == nil { |
||||
dm.Fields = make([]*FieldMapping, 0) |
||||
} |
||||
dm.Fields = append(dm.Fields, fm) |
||||
} |
||||
|
||||
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
||||
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error { |
||||
|
||||
var tmp map[string]json.RawMessage |
||||
err := json.Unmarshal(data, &tmp) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
// set defaults for fields which might have been omitted
|
||||
dm.Enabled = true |
||||
dm.Dynamic = true |
||||
|
||||
var invalidKeys []string |
||||
for k, v := range tmp { |
||||
switch k { |
||||
case "enabled": |
||||
err := json.Unmarshal(v, &dm.Enabled) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "dynamic": |
||||
err := json.Unmarshal(v, &dm.Dynamic) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "default_analyzer": |
||||
err := json.Unmarshal(v, &dm.DefaultAnalyzer) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "properties": |
||||
err := json.Unmarshal(v, &dm.Properties) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "fields": |
||||
err := json.Unmarshal(v, &dm.Fields) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "struct_tag_key": |
||||
err := json.Unmarshal(v, &dm.StructTagKey) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
default: |
||||
invalidKeys = append(invalidKeys, k) |
||||
} |
||||
} |
||||
|
||||
if MappingJSONStrict && len(invalidKeys) > 0 { |
||||
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string { |
||||
rv := "" |
||||
current := dm |
||||
for _, pathElement := range path { |
||||
var ok bool |
||||
current, ok = current.Properties[pathElement] |
||||
if !ok { |
||||
break |
||||
} |
||||
if current.DefaultAnalyzer != "" { |
||||
rv = current.DefaultAnalyzer |
||||
} |
||||
} |
||||
return rv |
||||
} |
||||
|
||||
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) { |
||||
// allow default "json" tag to be overriden
|
||||
structTagKey := dm.StructTagKey |
||||
if structTagKey == "" { |
||||
structTagKey = "json" |
||||
} |
||||
|
||||
val := reflect.ValueOf(data) |
||||
typ := val.Type() |
||||
switch typ.Kind() { |
||||
case reflect.Map: |
||||
// FIXME can add support for other map keys in the future
|
||||
if typ.Key().Kind() == reflect.String { |
||||
for _, key := range val.MapKeys() { |
||||
fieldName := key.String() |
||||
fieldVal := val.MapIndex(key).Interface() |
||||
dm.processProperty(fieldVal, append(path, fieldName), indexes, context) |
||||
} |
||||
} |
||||
case reflect.Struct: |
||||
for i := 0; i < val.NumField(); i++ { |
||||
field := typ.Field(i) |
||||
fieldName := field.Name |
||||
// anonymous fields of type struct can elide the type name
|
||||
if field.Anonymous && field.Type.Kind() == reflect.Struct { |
||||
fieldName = "" |
||||
} |
||||
|
||||
// if the field has a name under the specified tag, prefer that
|
||||
tag := field.Tag.Get(structTagKey) |
||||
tagFieldName := parseTagName(tag) |
||||
if tagFieldName == "-" { |
||||
continue |
||||
} |
||||
// allow tag to set field name to empty, only if anonymous
|
||||
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) { |
||||
fieldName = tagFieldName |
||||
} |
||||
|
||||
if val.Field(i).CanInterface() { |
||||
fieldVal := val.Field(i).Interface() |
||||
newpath := path |
||||
if fieldName != "" { |
||||
newpath = append(path, fieldName) |
||||
} |
||||
dm.processProperty(fieldVal, newpath, indexes, context) |
||||
} |
||||
} |
||||
case reflect.Slice, reflect.Array: |
||||
for i := 0; i < val.Len(); i++ { |
||||
if val.Index(i).CanInterface() { |
||||
fieldVal := val.Index(i).Interface() |
||||
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context) |
||||
} |
||||
} |
||||
case reflect.Ptr: |
||||
ptrElem := val.Elem() |
||||
if ptrElem.IsValid() && ptrElem.CanInterface() { |
||||
dm.processProperty(ptrElem.Interface(), path, indexes, context) |
||||
} |
||||
case reflect.String: |
||||
dm.processProperty(val.String(), path, indexes, context) |
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: |
||||
dm.processProperty(float64(val.Int()), path, indexes, context) |
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: |
||||
dm.processProperty(float64(val.Uint()), path, indexes, context) |
||||
case reflect.Float32, reflect.Float64: |
||||
dm.processProperty(float64(val.Float()), path, indexes, context) |
||||
case reflect.Bool: |
||||
dm.processProperty(val.Bool(), path, indexes, context) |
||||
} |
||||
|
||||
} |
||||
|
||||
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) { |
||||
pathString := encodePath(path) |
||||
// look to see if there is a mapping for this field
|
||||
subDocMapping := dm.documentMappingForPath(pathString) |
||||
closestDocMapping := dm.closestDocMapping(pathString) |
||||
|
||||
// check to see if we even need to do further processing
|
||||
if subDocMapping != nil && !subDocMapping.Enabled { |
||||
return |
||||
} |
||||
|
||||
propertyValue := reflect.ValueOf(property) |
||||
if !propertyValue.IsValid() { |
||||
// cannot do anything with the zero value
|
||||
return |
||||
} |
||||
propertyType := propertyValue.Type() |
||||
switch propertyType.Kind() { |
||||
case reflect.String: |
||||
propertyValueString := propertyValue.String() |
||||
if subDocMapping != nil { |
||||
// index by explicit mapping
|
||||
for _, fieldMapping := range subDocMapping.Fields { |
||||
fieldMapping.processString(propertyValueString, pathString, path, indexes, context) |
||||
} |
||||
} else if closestDocMapping.Dynamic { |
||||
// automatic indexing behavior
|
||||
|
||||
// first see if it can be parsed by the default date parser
|
||||
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser) |
||||
if dateTimeParser != nil { |
||||
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString) |
||||
if err != nil { |
||||
// index as text
|
||||
fieldMapping := newTextFieldMappingDynamic(context.im) |
||||
fieldMapping.processString(propertyValueString, pathString, path, indexes, context) |
||||
} else { |
||||
// index as datetime
|
||||
fieldMapping := newDateTimeFieldMappingDynamic(context.im) |
||||
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context) |
||||
} |
||||
} |
||||
} |
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: |
||||
dm.processProperty(float64(propertyValue.Int()), path, indexes, context) |
||||
return |
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: |
||||
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context) |
||||
return |
||||
case reflect.Float64, reflect.Float32: |
||||
propertyValFloat := propertyValue.Float() |
||||
if subDocMapping != nil { |
||||
// index by explicit mapping
|
||||
for _, fieldMapping := range subDocMapping.Fields { |
||||
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context) |
||||
} |
||||
} else if closestDocMapping.Dynamic { |
||||
// automatic indexing behavior
|
||||
fieldMapping := newNumericFieldMappingDynamic(context.im) |
||||
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context) |
||||
} |
||||
case reflect.Bool: |
||||
propertyValBool := propertyValue.Bool() |
||||
if subDocMapping != nil { |
||||
// index by explicit mapping
|
||||
for _, fieldMapping := range subDocMapping.Fields { |
||||
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context) |
||||
} |
||||
} else if closestDocMapping.Dynamic { |
||||
// automatic indexing behavior
|
||||
fieldMapping := newBooleanFieldMappingDynamic(context.im) |
||||
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context) |
||||
} |
||||
case reflect.Struct: |
||||
switch property := property.(type) { |
||||
case time.Time: |
||||
// don't descend into the time struct
|
||||
if subDocMapping != nil { |
||||
// index by explicit mapping
|
||||
for _, fieldMapping := range subDocMapping.Fields { |
||||
fieldMapping.processTime(property, pathString, path, indexes, context) |
||||
} |
||||
} else if closestDocMapping.Dynamic { |
||||
fieldMapping := newDateTimeFieldMappingDynamic(context.im) |
||||
fieldMapping.processTime(property, pathString, path, indexes, context) |
||||
} |
||||
default: |
||||
dm.walkDocument(property, path, indexes, context) |
||||
} |
||||
default: |
||||
dm.walkDocument(property, path, indexes, context) |
||||
} |
||||
} |
@ -0,0 +1,296 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mapping |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"fmt" |
||||
"time" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/document" |
||||
) |
||||
|
||||
// control the default behavior for dynamic fields (those not explicitly mapped)
|
||||
var ( |
||||
IndexDynamic = true |
||||
StoreDynamic = true |
||||
) |
||||
|
||||
// A FieldMapping describes how a specific item
|
||||
// should be put into the index.
|
||||
type FieldMapping struct { |
||||
Name string `json:"name,omitempty"` |
||||
Type string `json:"type,omitempty"` |
||||
|
||||
// Analyzer specifies the name of the analyzer to use for this field. If
|
||||
// Analyzer is empty, traverse the DocumentMapping tree toward the root and
|
||||
// pick the first non-empty DefaultAnalyzer found. If there is none, use
|
||||
// the IndexMapping.DefaultAnalyzer.
|
||||
Analyzer string `json:"analyzer,omitempty"` |
||||
|
||||
// Store indicates whether to store field values in the index. Stored
|
||||
// values can be retrieved from search results using SearchRequest.Fields.
|
||||
Store bool `json:"store,omitempty"` |
||||
Index bool `json:"index,omitempty"` |
||||
|
||||
// IncludeTermVectors, if true, makes terms occurrences to be recorded for
|
||||
// this field. It includes the term position within the terms sequence and
|
||||
// the term offsets in the source document field. Term vectors are required
|
||||
// to perform phrase queries or terms highlighting in source documents.
|
||||
IncludeTermVectors bool `json:"include_term_vectors,omitempty"` |
||||
IncludeInAll bool `json:"include_in_all,omitempty"` |
||||
DateFormat string `json:"date_format,omitempty"` |
||||
} |
||||
|
||||
// NewTextFieldMapping returns a default field mapping for text
|
||||
func NewTextFieldMapping() *FieldMapping { |
||||
return &FieldMapping{ |
||||
Type: "text", |
||||
Store: true, |
||||
Index: true, |
||||
IncludeTermVectors: true, |
||||
IncludeInAll: true, |
||||
} |
||||
} |
||||
|
||||
func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { |
||||
rv := NewTextFieldMapping() |
||||
rv.Store = im.StoreDynamic |
||||
rv.Index = im.IndexDynamic |
||||
return rv |
||||
} |
||||
|
||||
// NewNumericFieldMapping returns a default field mapping for numbers
|
||||
func NewNumericFieldMapping() *FieldMapping { |
||||
return &FieldMapping{ |
||||
Type: "number", |
||||
Store: true, |
||||
Index: true, |
||||
IncludeInAll: true, |
||||
} |
||||
} |
||||
|
||||
func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { |
||||
rv := NewNumericFieldMapping() |
||||
rv.Store = im.StoreDynamic |
||||
rv.Index = im.IndexDynamic |
||||
return rv |
||||
} |
||||
|
||||
// NewDateTimeFieldMapping returns a default field mapping for dates
|
||||
func NewDateTimeFieldMapping() *FieldMapping { |
||||
return &FieldMapping{ |
||||
Type: "datetime", |
||||
Store: true, |
||||
Index: true, |
||||
IncludeInAll: true, |
||||
} |
||||
} |
||||
|
||||
func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { |
||||
rv := NewDateTimeFieldMapping() |
||||
rv.Store = im.StoreDynamic |
||||
rv.Index = im.IndexDynamic |
||||
return rv |
||||
} |
||||
|
||||
// NewBooleanFieldMapping returns a default field mapping for booleans
|
||||
func NewBooleanFieldMapping() *FieldMapping { |
||||
return &FieldMapping{ |
||||
Type: "boolean", |
||||
Store: true, |
||||
Index: true, |
||||
IncludeInAll: true, |
||||
} |
||||
} |
||||
|
||||
func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { |
||||
rv := NewBooleanFieldMapping() |
||||
rv.Store = im.StoreDynamic |
||||
rv.Index = im.IndexDynamic |
||||
return rv |
||||
} |
||||
|
||||
// Options returns the indexing options for this field.
|
||||
func (fm *FieldMapping) Options() document.IndexingOptions { |
||||
var rv document.IndexingOptions |
||||
if fm.Store { |
||||
rv |= document.StoreField |
||||
} |
||||
if fm.Index { |
||||
rv |= document.IndexField |
||||
} |
||||
if fm.IncludeTermVectors { |
||||
rv |= document.IncludeTermVectors |
||||
} |
||||
return rv |
||||
} |
||||
|
||||
func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) { |
||||
fieldName := getFieldName(pathString, path, fm) |
||||
options := fm.Options() |
||||
if fm.Type == "text" { |
||||
analyzer := fm.analyzerForField(path, context) |
||||
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer) |
||||
context.doc.AddField(field) |
||||
|
||||
if !fm.IncludeInAll { |
||||
context.excludedFromAll = append(context.excludedFromAll, fieldName) |
||||
} |
||||
} else if fm.Type == "datetime" { |
||||
dateTimeFormat := context.im.DefaultDateTimeParser |
||||
if fm.DateFormat != "" { |
||||
dateTimeFormat = fm.DateFormat |
||||
} |
||||
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat) |
||||
if dateTimeParser != nil { |
||||
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString) |
||||
if err == nil { |
||||
fm.processTime(parsedDateTime, pathString, path, indexes, context) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) { |
||||
fieldName := getFieldName(pathString, path, fm) |
||||
if fm.Type == "number" { |
||||
options := fm.Options() |
||||
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options) |
||||
context.doc.AddField(field) |
||||
|
||||
if !fm.IncludeInAll { |
||||
context.excludedFromAll = append(context.excludedFromAll, fieldName) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (fm *FieldMapping) processTime(propertyValueTime time.Time, pathString string, path []string, indexes []uint64, context *walkContext) { |
||||
fieldName := getFieldName(pathString, path, fm) |
||||
if fm.Type == "datetime" { |
||||
options := fm.Options() |
||||
field, err := document.NewDateTimeFieldWithIndexingOptions(fieldName, indexes, propertyValueTime, options) |
||||
if err == nil { |
||||
context.doc.AddField(field) |
||||
} else { |
||||
logger.Printf("could not build date %v", err) |
||||
} |
||||
|
||||
if !fm.IncludeInAll { |
||||
context.excludedFromAll = append(context.excludedFromAll, fieldName) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string, path []string, indexes []uint64, context *walkContext) { |
||||
fieldName := getFieldName(pathString, path, fm) |
||||
if fm.Type == "boolean" { |
||||
options := fm.Options() |
||||
field := document.NewBooleanFieldWithIndexingOptions(fieldName, indexes, propertyValueBool, options) |
||||
context.doc.AddField(field) |
||||
|
||||
if !fm.IncludeInAll { |
||||
context.excludedFromAll = append(context.excludedFromAll, fieldName) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { |
||||
analyzerName := fm.Analyzer |
||||
if analyzerName == "" { |
||||
analyzerName = context.dm.defaultAnalyzerName(path) |
||||
if analyzerName == "" { |
||||
analyzerName = context.im.DefaultAnalyzer |
||||
} |
||||
} |
||||
return context.im.AnalyzerNamed(analyzerName) |
||||
} |
||||
|
||||
func getFieldName(pathString string, path []string, fieldMapping *FieldMapping) string { |
||||
fieldName := pathString |
||||
if fieldMapping.Name != "" { |
||||
parentName := "" |
||||
if len(path) > 1 { |
||||
parentName = encodePath(path[:len(path)-1]) + pathSeparator |
||||
} |
||||
fieldName = parentName + fieldMapping.Name |
||||
} |
||||
return fieldName |
||||
} |
||||
|
||||
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
||||
func (fm *FieldMapping) UnmarshalJSON(data []byte) error { |
||||
|
||||
var tmp map[string]json.RawMessage |
||||
err := json.Unmarshal(data, &tmp) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
var invalidKeys []string |
||||
for k, v := range tmp { |
||||
switch k { |
||||
case "name": |
||||
err := json.Unmarshal(v, &fm.Name) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "type": |
||||
err := json.Unmarshal(v, &fm.Type) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "analyzer": |
||||
err := json.Unmarshal(v, &fm.Analyzer) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "store": |
||||
err := json.Unmarshal(v, &fm.Store) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "index": |
||||
err := json.Unmarshal(v, &fm.Index) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "include_term_vectors": |
||||
err := json.Unmarshal(v, &fm.IncludeTermVectors) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "include_in_all": |
||||
err := json.Unmarshal(v, &fm.IncludeInAll) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "date_format": |
||||
err := json.Unmarshal(v, &fm.DateFormat) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
default: |
||||
invalidKeys = append(invalidKeys, k) |
||||
} |
||||
} |
||||
|
||||
if MappingJSONStrict && len(invalidKeys) > 0 { |
||||
return fmt.Errorf("field mapping contains invalid keys: %v", invalidKeys) |
||||
} |
||||
|
||||
return nil |
||||
} |
@ -0,0 +1,430 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mapping |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/analysis/analyzer/standard" |
||||
"github.com/blevesearch/bleve/analysis/datetime/optional" |
||||
"github.com/blevesearch/bleve/document" |
||||
"github.com/blevesearch/bleve/registry" |
||||
) |
||||
|
||||
var MappingJSONStrict = false |
||||
|
||||
const defaultTypeField = "_type" |
||||
const defaultType = "_default" |
||||
const defaultField = "_all" |
||||
const defaultAnalyzer = standard.Name |
||||
const defaultDateTimeParser = optional.Name |
||||
|
||||
// An IndexMappingImpl controls how objects are placed
|
||||
// into an index.
|
||||
// First the type of the object is determined.
|
||||
// Once the type is know, the appropriate
|
||||
// DocumentMapping is selected by the type.
|
||||
// If no mapping was determined for that type,
|
||||
// a DefaultMapping will be used.
|
||||
type IndexMappingImpl struct { |
||||
TypeMapping map[string]*DocumentMapping `json:"types,omitempty"` |
||||
DefaultMapping *DocumentMapping `json:"default_mapping"` |
||||
TypeField string `json:"type_field"` |
||||
DefaultType string `json:"default_type"` |
||||
DefaultAnalyzer string `json:"default_analyzer"` |
||||
DefaultDateTimeParser string `json:"default_datetime_parser"` |
||||
DefaultField string `json:"default_field"` |
||||
StoreDynamic bool `json:"store_dynamic"` |
||||
IndexDynamic bool `json:"index_dynamic"` |
||||
CustomAnalysis *customAnalysis `json:"analysis,omitempty"` |
||||
cache *registry.Cache |
||||
} |
||||
|
||||
// AddCustomCharFilter defines a custom char filter for use in this mapping
|
||||
func (im *IndexMappingImpl) AddCustomCharFilter(name string, config map[string]interface{}) error { |
||||
_, err := im.cache.DefineCharFilter(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
im.CustomAnalysis.CharFilters[name] = config |
||||
return nil |
||||
} |
||||
|
||||
// AddCustomTokenizer defines a custom tokenizer for use in this mapping
|
||||
func (im *IndexMappingImpl) AddCustomTokenizer(name string, config map[string]interface{}) error { |
||||
_, err := im.cache.DefineTokenizer(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
im.CustomAnalysis.Tokenizers[name] = config |
||||
return nil |
||||
} |
||||
|
||||
// AddCustomTokenMap defines a custom token map for use in this mapping
|
||||
func (im *IndexMappingImpl) AddCustomTokenMap(name string, config map[string]interface{}) error { |
||||
_, err := im.cache.DefineTokenMap(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
im.CustomAnalysis.TokenMaps[name] = config |
||||
return nil |
||||
} |
||||
|
||||
// AddCustomTokenFilter defines a custom token filter for use in this mapping
|
||||
func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]interface{}) error { |
||||
_, err := im.cache.DefineTokenFilter(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
im.CustomAnalysis.TokenFilters[name] = config |
||||
return nil |
||||
} |
||||
|
||||
// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The
|
||||
// config map must have a "type" string entry to resolve the analyzer
|
||||
// constructor. The constructor is invoked with the remaining entries and
|
||||
// returned analyzer is registered in the IndexMapping.
|
||||
//
|
||||
// bleve comes with predefined analyzers, like
|
||||
// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are
|
||||
// available only if their package is imported by client code. To achieve this,
|
||||
// use their metadata to fill configuration entries:
|
||||
//
|
||||
// import (
|
||||
// "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer"
|
||||
// "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter"
|
||||
// "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||
// "github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
||||
// )
|
||||
//
|
||||
// m := bleve.NewIndexMapping()
|
||||
// err := m.AddCustomAnalyzer("html", map[string]interface{}{
|
||||
// "type": custom_analyzer.Name,
|
||||
// "char_filters": []string{
|
||||
// html_char_filter.Name,
|
||||
// },
|
||||
// "tokenizer": unicode.Name,
|
||||
// "token_filters": []string{
|
||||
// lower_case_filter.Name,
|
||||
// ...
|
||||
// },
|
||||
// })
|
||||
func (im *IndexMappingImpl) AddCustomAnalyzer(name string, config map[string]interface{}) error { |
||||
_, err := im.cache.DefineAnalyzer(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
im.CustomAnalysis.Analyzers[name] = config |
||||
return nil |
||||
} |
||||
|
||||
// AddCustomDateTimeParser defines a custom date time parser for use in this mapping
|
||||
func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[string]interface{}) error { |
||||
_, err := im.cache.DefineDateTimeParser(name, config) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
im.CustomAnalysis.DateTimeParsers[name] = config |
||||
return nil |
||||
} |
||||
|
||||
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
|
||||
func NewIndexMapping() *IndexMappingImpl { |
||||
return &IndexMappingImpl{ |
||||
TypeMapping: make(map[string]*DocumentMapping), |
||||
DefaultMapping: NewDocumentMapping(), |
||||
TypeField: defaultTypeField, |
||||
DefaultType: defaultType, |
||||
DefaultAnalyzer: defaultAnalyzer, |
||||
DefaultDateTimeParser: defaultDateTimeParser, |
||||
DefaultField: defaultField, |
||||
IndexDynamic: IndexDynamic, |
||||
StoreDynamic: StoreDynamic, |
||||
CustomAnalysis: newCustomAnalysis(), |
||||
cache: registry.NewCache(), |
||||
} |
||||
} |
||||
|
||||
// Validate will walk the entire structure ensuring the following
|
||||
// explicitly named and default analyzers can be built
|
||||
func (im *IndexMappingImpl) Validate() error { |
||||
_, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
_, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
err = im.DefaultMapping.Validate(im.cache) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
for _, docMapping := range im.TypeMapping { |
||||
err = docMapping.Validate(im.cache) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// AddDocumentMapping sets a custom document mapping for the specified type
|
||||
func (im *IndexMappingImpl) AddDocumentMapping(doctype string, dm *DocumentMapping) { |
||||
im.TypeMapping[doctype] = dm |
||||
} |
||||
|
||||
func (im *IndexMappingImpl) mappingForType(docType string) *DocumentMapping { |
||||
docMapping := im.TypeMapping[docType] |
||||
if docMapping == nil { |
||||
docMapping = im.DefaultMapping |
||||
} |
||||
return docMapping |
||||
} |
||||
|
||||
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
||||
func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error { |
||||
|
||||
var tmp map[string]json.RawMessage |
||||
err := json.Unmarshal(data, &tmp) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
// set defaults for fields which might have been omitted
|
||||
im.cache = registry.NewCache() |
||||
im.CustomAnalysis = newCustomAnalysis() |
||||
im.TypeField = defaultTypeField |
||||
im.DefaultType = defaultType |
||||
im.DefaultAnalyzer = defaultAnalyzer |
||||
im.DefaultDateTimeParser = defaultDateTimeParser |
||||
im.DefaultField = defaultField |
||||
im.DefaultMapping = NewDocumentMapping() |
||||
im.TypeMapping = make(map[string]*DocumentMapping) |
||||
im.StoreDynamic = StoreDynamic |
||||
im.IndexDynamic = IndexDynamic |
||||
|
||||
var invalidKeys []string |
||||
for k, v := range tmp { |
||||
switch k { |
||||
case "analysis": |
||||
err := json.Unmarshal(v, &im.CustomAnalysis) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "type_field": |
||||
err := json.Unmarshal(v, &im.TypeField) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "default_type": |
||||
err := json.Unmarshal(v, &im.DefaultType) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "default_analyzer": |
||||
err := json.Unmarshal(v, &im.DefaultAnalyzer) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "default_datetime_parser": |
||||
err := json.Unmarshal(v, &im.DefaultDateTimeParser) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "default_field": |
||||
err := json.Unmarshal(v, &im.DefaultField) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "default_mapping": |
||||
err := json.Unmarshal(v, &im.DefaultMapping) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "types": |
||||
err := json.Unmarshal(v, &im.TypeMapping) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "store_dynamic": |
||||
err := json.Unmarshal(v, &im.StoreDynamic) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
case "index_dynamic": |
||||
err := json.Unmarshal(v, &im.IndexDynamic) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
default: |
||||
invalidKeys = append(invalidKeys, k) |
||||
} |
||||
} |
||||
|
||||
if MappingJSONStrict && len(invalidKeys) > 0 { |
||||
return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys) |
||||
} |
||||
|
||||
err = im.CustomAnalysis.registerAll(im) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (im *IndexMappingImpl) determineType(data interface{}) string { |
||||
// first see if the object implements Classifier
|
||||
classifier, ok := data.(Classifier) |
||||
if ok { |
||||
return classifier.Type() |
||||
} |
||||
|
||||
// now see if we can find a type using the mapping
|
||||
typ, ok := mustString(lookupPropertyPath(data, im.TypeField)) |
||||
if ok { |
||||
return typ |
||||
} |
||||
|
||||
return im.DefaultType |
||||
} |
||||
|
||||
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error { |
||||
docType := im.determineType(data) |
||||
docMapping := im.mappingForType(docType) |
||||
walkContext := im.newWalkContext(doc, docMapping) |
||||
if docMapping.Enabled { |
||||
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) |
||||
|
||||
// see if the _all field was disabled
|
||||
allMapping := docMapping.documentMappingForPath("_all") |
||||
if allMapping == nil || (allMapping.Enabled != false) { |
||||
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors) |
||||
doc.AddField(field) |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
type walkContext struct { |
||||
doc *document.Document |
||||
im *IndexMappingImpl |
||||
dm *DocumentMapping |
||||
excludedFromAll []string |
||||
} |
||||
|
||||
func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext { |
||||
return &walkContext{ |
||||
doc: doc, |
||||
im: im, |
||||
dm: dm, |
||||
excludedFromAll: []string{}, |
||||
} |
||||
} |
||||
|
||||
// AnalyzerNameForPath attempts to find the best analyzer to use with only a
|
||||
// field name will walk all the document types, look for field mappings at the
|
||||
// provided path, if one exists and it has an explicit analyzer that is
|
||||
// returned.
|
||||
func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string { |
||||
// first we look for explicit mapping on the field
|
||||
for _, docMapping := range im.TypeMapping { |
||||
analyzerName := docMapping.analyzerNameForPath(path) |
||||
if analyzerName != "" { |
||||
return analyzerName |
||||
} |
||||
} |
||||
// now try the default mapping
|
||||
pathMapping := im.DefaultMapping.documentMappingForPath(path) |
||||
if pathMapping != nil { |
||||
if len(pathMapping.Fields) > 0 { |
||||
if pathMapping.Fields[0].Analyzer != "" { |
||||
return pathMapping.Fields[0].Analyzer |
||||
} |
||||
} |
||||
} |
||||
|
||||
// next we will try default analyzers for the path
|
||||
pathDecoded := decodePath(path) |
||||
for _, docMapping := range im.TypeMapping { |
||||
rv := docMapping.defaultAnalyzerName(pathDecoded) |
||||
if rv != "" { |
||||
return rv |
||||
} |
||||
} |
||||
|
||||
return im.DefaultAnalyzer |
||||
} |
||||
|
||||
func (im *IndexMappingImpl) AnalyzerNamed(name string) *analysis.Analyzer { |
||||
analyzer, err := im.cache.AnalyzerNamed(name) |
||||
if err != nil { |
||||
logger.Printf("error using analyzer named: %s", name) |
||||
return nil |
||||
} |
||||
return analyzer |
||||
} |
||||
|
||||
func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimeParser { |
||||
if name == "" { |
||||
name = im.DefaultDateTimeParser |
||||
} |
||||
dateTimeParser, err := im.cache.DateTimeParserNamed(name) |
||||
if err != nil { |
||||
logger.Printf("error using datetime parser named: %s", name) |
||||
return nil |
||||
} |
||||
return dateTimeParser |
||||
} |
||||
|
||||
func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string { |
||||
|
||||
// first we look for explicit mapping on the field
|
||||
for _, docMapping := range im.TypeMapping { |
||||
pathMapping := docMapping.documentMappingForPath(path) |
||||
if pathMapping != nil { |
||||
if len(pathMapping.Fields) > 0 { |
||||
if pathMapping.Fields[0].Analyzer != "" { |
||||
return pathMapping.Fields[0].Analyzer |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return im.DefaultDateTimeParser |
||||
} |
||||
|
||||
func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) { |
||||
analyzer, err := im.cache.AnalyzerNamed(analyzerName) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return analyzer.Analyze(text), nil |
||||
} |
||||
|
||||
// FieldAnalyzer returns the name of the analyzer used on a field.
|
||||
func (im *IndexMappingImpl) FieldAnalyzer(field string) string { |
||||
return im.AnalyzerNameForPath(field) |
||||
} |
||||
|
||||
// wrapper to satisfy new interface
|
||||
|
||||
func (im *IndexMappingImpl) DefaultSearchField() string { |
||||
return im.DefaultField |
||||
} |
@ -0,0 +1,49 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mapping |
||||
|
||||
import ( |
||||
"io/ioutil" |
||||
"log" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
"github.com/blevesearch/bleve/document" |
||||
) |
||||
|
||||
// A Classifier is an interface describing any object
|
||||
// which knows how to identify its own type.
|
||||
type Classifier interface { |
||||
Type() string |
||||
} |
||||
|
||||
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags) |
||||
|
||||
// SetLog sets the logger used for logging
|
||||
// by default log messages are sent to ioutil.Discard
|
||||
func SetLog(l *log.Logger) { |
||||
logger = l |
||||
} |
||||
|
||||
type IndexMapping interface { |
||||
MapDocument(doc *document.Document, data interface{}) error |
||||
Validate() error |
||||
|
||||
DateTimeParserNamed(name string) analysis.DateTimeParser |
||||
|
||||
DefaultSearchField() string |
||||
|
||||
AnalyzerNameForPath(path string) string |
||||
AnalyzerNamed(name string) *analysis.Analyzer |
||||
} |
@ -0,0 +1,89 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package mapping |
||||
|
||||
import ( |
||||
"reflect" |
||||
"strings" |
||||
) |
||||
|
||||
func lookupPropertyPath(data interface{}, path string) interface{} { |
||||
pathParts := decodePath(path) |
||||
|
||||
current := data |
||||
for _, part := range pathParts { |
||||
current = lookupPropertyPathPart(current, part) |
||||
if current == nil { |
||||
break |
||||
} |
||||
} |
||||
|
||||
return current |
||||
} |
||||
|
||||
func lookupPropertyPathPart(data interface{}, part string) interface{} { |
||||
val := reflect.ValueOf(data) |
||||
typ := val.Type() |
||||
switch typ.Kind() { |
||||
case reflect.Map: |
||||
// FIXME can add support for other map keys in the future
|
||||
if typ.Key().Kind() == reflect.String { |
||||
key := reflect.ValueOf(part) |
||||
entry := val.MapIndex(key) |
||||
if entry.IsValid() { |
||||
return entry.Interface() |
||||
} |
||||
} |
||||
case reflect.Struct: |
||||
field := val.FieldByName(part) |
||||
if field.IsValid() && field.CanInterface() { |
||||
return field.Interface() |
||||
} |
||||
case reflect.Ptr: |
||||
ptrElem := val.Elem() |
||||
if ptrElem.IsValid() && ptrElem.CanInterface() { |
||||
return lookupPropertyPathPart(ptrElem.Interface(), part) |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
const pathSeparator = "." |
||||
|
||||
func decodePath(path string) []string { |
||||
return strings.Split(path, pathSeparator) |
||||
} |
||||
|
||||
func encodePath(pathElements []string) string { |
||||
return strings.Join(pathElements, pathSeparator) |
||||
} |
||||
|
||||
func mustString(data interface{}) (string, bool) { |
||||
if data != nil { |
||||
str, ok := data.(string) |
||||
if ok { |
||||
return str, true |
||||
} |
||||
} |
||||
return "", false |
||||
} |
||||
|
||||
// parseTagName extracts the field name from a struct tag
|
||||
func parseTagName(tag string) string { |
||||
if idx := strings.Index(tag, ","); idx != -1 { |
||||
return tag[:idx] |
||||
} |
||||
return tag |
||||
} |
@ -0,0 +1,34 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package numeric |
||||
|
||||
import ( |
||||
"math" |
||||
) |
||||
|
||||
func Float64ToInt64(f float64) int64 { |
||||
fasint := int64(math.Float64bits(f)) |
||||
if fasint < 0 { |
||||
fasint = fasint ^ 0x7fffffffffffffff |
||||
} |
||||
return fasint |
||||
} |
||||
|
||||
func Int64ToFloat64(i int64) float64 { |
||||
if i < 0 { |
||||
i ^= 0x7fffffffffffffff |
||||
} |
||||
return math.Float64frombits(uint64(i)) |
||||
} |
@ -0,0 +1,92 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package numeric |
||||
|
||||
import "fmt" |
||||
|
||||
const ShiftStartInt64 byte = 0x20 |
||||
|
||||
// PrefixCoded is a byte array encoding of
|
||||
// 64-bit numeric values shifted by 0-63 bits
|
||||
type PrefixCoded []byte |
||||
|
||||
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { |
||||
if shift > 63 { |
||||
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) |
||||
} |
||||
|
||||
nChars := ((63 - shift) / 7) + 1 |
||||
rv := make(PrefixCoded, nChars+1) |
||||
rv[0] = ShiftStartInt64 + byte(shift) |
||||
|
||||
sortableBits := int64(uint64(in) ^ 0x8000000000000000) |
||||
sortableBits = int64(uint64(sortableBits) >> shift) |
||||
for nChars > 0 { |
||||
// Store 7 bits per byte for compatibility
|
||||
// with UTF-8 encoding of terms
|
||||
rv[nChars] = byte(sortableBits & 0x7f) |
||||
nChars-- |
||||
sortableBits = int64(uint64(sortableBits) >> 7) |
||||
} |
||||
return rv, nil |
||||
} |
||||
|
||||
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { |
||||
rv, err := NewPrefixCodedInt64(in, shift) |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
return rv |
||||
} |
||||
|
||||
// Shift returns the number of bits shifted
|
||||
// returns 0 if in uninitialized state
|
||||
func (p PrefixCoded) Shift() (uint, error) { |
||||
if len(p) > 0 { |
||||
shift := p[0] - ShiftStartInt64 |
||||
if shift < 0 || shift < 63 { |
||||
return uint(shift), nil |
||||
} |
||||
} |
||||
return 0, fmt.Errorf("invalid prefix coded value") |
||||
} |
||||
|
||||
func (p PrefixCoded) Int64() (int64, error) { |
||||
shift, err := p.Shift() |
||||
if err != nil { |
||||
return 0, err |
||||
} |
||||
var sortableBits int64 |
||||
for _, inbyte := range p[1:] { |
||||
sortableBits <<= 7 |
||||
sortableBits |= int64(inbyte) |
||||
} |
||||
return int64(uint64((sortableBits << shift)) ^ 0x8000000000000000), nil |
||||
} |
||||
|
||||
func ValidPrefixCodedTerm(p string) (bool, int) { |
||||
if len(p) > 0 { |
||||
if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 { |
||||
return false, 0 |
||||
} |
||||
shift := p[0] - ShiftStartInt64 |
||||
nChars := ((63 - int(shift)) / 7) + 1 |
||||
if len(p) != nChars+1 { |
||||
return false, 0 |
||||
} |
||||
return true, int(shift) |
||||
} |
||||
return false, 0 |
||||
} |
@ -0,0 +1,186 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"time" |
||||
|
||||
"github.com/blevesearch/bleve/search/query" |
||||
) |
||||
|
||||
// NewBoolFieldQuery creates a new Query for boolean fields
|
||||
func NewBoolFieldQuery(val bool) *query.BoolFieldQuery { |
||||
return query.NewBoolFieldQuery(val) |
||||
} |
||||
|
||||
// NewBooleanQuery creates a compound Query composed
|
||||
// of several other Query objects.
|
||||
// These other query objects are added using the
|
||||
// AddMust() AddShould() and AddMustNot() methods.
|
||||
// Result documents must satisfy ALL of the
|
||||
// must Queries.
|
||||
// Result documents must satisfy NONE of the must not
|
||||
// Queries.
|
||||
// Result documents that ALSO satisfy any of the should
|
||||
// Queries will score higher.
|
||||
func NewBooleanQuery() *query.BooleanQuery { |
||||
return query.NewBooleanQuery(nil, nil, nil) |
||||
} |
||||
|
||||
// NewConjunctionQuery creates a new compound Query.
|
||||
// Result documents must satisfy all of the queries.
|
||||
func NewConjunctionQuery(conjuncts ...query.Query) *query.ConjunctionQuery { |
||||
return query.NewConjunctionQuery(conjuncts) |
||||
} |
||||
|
||||
// NewDateRangeQuery creates a new Query for ranges
|
||||
// of date values.
|
||||
// Date strings are parsed using the DateTimeParser configured in the
|
||||
// top-level config.QueryDateTimeParser
|
||||
// Either, but not both endpoints can be nil.
|
||||
func NewDateRangeQuery(start, end time.Time) *query.DateRangeQuery { |
||||
return query.NewDateRangeQuery(start, end) |
||||
} |
||||
|
||||
// NewDateRangeInclusiveQuery creates a new Query for ranges
|
||||
// of date values.
|
||||
// Date strings are parsed using the DateTimeParser configured in the
|
||||
// top-level config.QueryDateTimeParser
|
||||
// Either, but not both endpoints can be nil.
|
||||
// startInclusive and endInclusive control inclusion of the endpoints.
|
||||
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *query.DateRangeQuery { |
||||
return query.NewDateRangeInclusiveQuery(start, end, startInclusive, endInclusive) |
||||
} |
||||
|
||||
// NewDisjunctionQuery creates a new compound Query.
|
||||
// Result documents satisfy at least one Query.
|
||||
func NewDisjunctionQuery(disjuncts ...query.Query) *query.DisjunctionQuery { |
||||
return query.NewDisjunctionQuery(disjuncts) |
||||
} |
||||
|
||||
// NewDocIDQuery creates a new Query object returning indexed documents among
|
||||
// the specified set. Combine it with ConjunctionQuery to restrict the scope of
|
||||
// other queries output.
|
||||
func NewDocIDQuery(ids []string) *query.DocIDQuery { |
||||
return query.NewDocIDQuery(ids) |
||||
} |
||||
|
||||
// NewFuzzyQuery creates a new Query which finds
|
||||
// documents containing terms within a specific
|
||||
// fuzziness of the specified term.
|
||||
// The default fuzziness is 1.
|
||||
//
|
||||
// The current implementation uses Levenshtein edit
|
||||
// distance as the fuzziness metric.
|
||||
func NewFuzzyQuery(term string) *query.FuzzyQuery { |
||||
return query.NewFuzzyQuery(term) |
||||
} |
||||
|
||||
// NewMatchAllQuery creates a Query which will
|
||||
// match all documents in the index.
|
||||
func NewMatchAllQuery() *query.MatchAllQuery { |
||||
return query.NewMatchAllQuery() |
||||
} |
||||
|
||||
// NewMatchNoneQuery creates a Query which will not
|
||||
// match any documents in the index.
|
||||
func NewMatchNoneQuery() *query.MatchNoneQuery { |
||||
return query.NewMatchNoneQuery() |
||||
} |
||||
|
||||
// NewMatchPhraseQuery creates a new Query object
|
||||
// for matching phrases in the index.
|
||||
// An Analyzer is chosen based on the field.
|
||||
// Input text is analyzed using this analyzer.
|
||||
// Token terms resulting from this analysis are
|
||||
// used to build a search phrase. Result documents
|
||||
// must match this phrase. Queried field must have been indexed with
|
||||
// IncludeTermVectors set to true.
|
||||
func NewMatchPhraseQuery(matchPhrase string) *query.MatchPhraseQuery { |
||||
return query.NewMatchPhraseQuery(matchPhrase) |
||||
} |
||||
|
||||
// NewMatchQuery creates a Query for matching text.
|
||||
// An Analyzer is chosen based on the field.
|
||||
// Input text is analyzed using this analyzer.
|
||||
// Token terms resulting from this analysis are
|
||||
// used to perform term searches. Result documents
|
||||
// must satisfy at least one of these term searches.
|
||||
func NewMatchQuery(match string) *query.MatchQuery { |
||||
return query.NewMatchQuery(match) |
||||
} |
||||
|
||||
// NewNumericRangeQuery creates a new Query for ranges
|
||||
// of numeric values.
|
||||
// Either, but not both endpoints can be nil.
|
||||
// The minimum value is inclusive.
|
||||
// The maximum value is exclusive.
|
||||
func NewNumericRangeQuery(min, max *float64) *query.NumericRangeQuery { |
||||
return query.NewNumericRangeQuery(min, max) |
||||
} |
||||
|
||||
// NewNumericRangeInclusiveQuery creates a new Query for ranges
|
||||
// of numeric values.
|
||||
// Either, but not both endpoints can be nil.
|
||||
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
|
||||
func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive *bool) *query.NumericRangeQuery { |
||||
return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive) |
||||
} |
||||
|
||||
// NewPhraseQuery creates a new Query for finding
|
||||
// exact term phrases in the index.
|
||||
// The provided terms must exist in the correct
|
||||
// order, at the correct index offsets, in the
|
||||
// specified field. Queried field must have been indexed with
|
||||
// IncludeTermVectors set to true.
|
||||
func NewPhraseQuery(terms []string, field string) *query.PhraseQuery { |
||||
return query.NewPhraseQuery(terms, field) |
||||
} |
||||
|
||||
// NewPrefixQuery creates a new Query which finds
|
||||
// documents containing terms that start with the
|
||||
// specified prefix.
|
||||
func NewPrefixQuery(prefix string) *query.PrefixQuery { |
||||
return query.NewPrefixQuery(prefix) |
||||
} |
||||
|
||||
// NewRegexpQuery creates a new Query which finds
|
||||
// documents containing terms that match the
|
||||
// specified regular expression.
|
||||
func NewRegexpQuery(regexp string) *query.RegexpQuery { |
||||
return query.NewRegexpQuery(regexp) |
||||
} |
||||
|
||||
// NewQueryStringQuery creates a new Query used for
|
||||
// finding documents that satisfy a query string. The
|
||||
// query string is a small query language for humans.
|
||||
func NewQueryStringQuery(q string) *query.QueryStringQuery { |
||||
return query.NewQueryStringQuery(q) |
||||
} |
||||
|
||||
// NewTermQuery creates a new Query for finding an
|
||||
// exact term match in the index.
|
||||
func NewTermQuery(term string) *query.TermQuery { |
||||
return query.NewTermQuery(term) |
||||
} |
||||
|
||||
// NewWildcardQuery creates a new Query which finds
|
||||
// documents containing terms that match the
|
||||
// specified wildcard. In the wildcard pattern '*'
|
||||
// will match any sequence of 0 or more characters,
|
||||
// and '?' will match any single character.
|
||||
func NewWildcardQuery(wildcard string) *query.WildcardQuery { |
||||
return query.NewWildcardQuery(wildcard) |
||||
} |
@ -0,0 +1,89 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package registry |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
) |
||||
|
||||
func RegisterAnalyzer(name string, constructor AnalyzerConstructor) { |
||||
_, exists := analyzers[name] |
||||
if exists { |
||||
panic(fmt.Errorf("attempted to register duplicate analyzer named '%s'", name)) |
||||
} |
||||
analyzers[name] = constructor |
||||
} |
||||
|
||||
type AnalyzerConstructor func(config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) |
||||
type AnalyzerRegistry map[string]AnalyzerConstructor |
||||
|
||||
type AnalyzerCache struct { |
||||
*ConcurrentCache |
||||
} |
||||
|
||||
func NewAnalyzerCache() *AnalyzerCache { |
||||
return &AnalyzerCache{ |
||||
NewConcurrentCache(), |
||||
} |
||||
} |
||||
|
||||
func AnalyzerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { |
||||
cons, registered := analyzers[name] |
||||
if !registered { |
||||
return nil, fmt.Errorf("no analyzer with name or type '%s' registered", name) |
||||
} |
||||
analyzer, err := cons(config, cache) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("error building analyzer: %v", err) |
||||
} |
||||
return analyzer, nil |
||||
} |
||||
|
||||
func (c *AnalyzerCache) AnalyzerNamed(name string, cache *Cache) (*analysis.Analyzer, error) { |
||||
item, err := c.ItemNamed(name, cache, AnalyzerBuild) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return item.(*analysis.Analyzer), nil |
||||
} |
||||
|
||||
func (c *AnalyzerCache) DefineAnalyzer(name string, typ string, config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) { |
||||
item, err := c.DefineItem(name, typ, config, cache, AnalyzerBuild) |
||||
if err != nil { |
||||
if err == ErrAlreadyDefined { |
||||
return nil, fmt.Errorf("analyzer named '%s' already defined", name) |
||||
} |
||||
return nil, err |
||||
} |
||||
return item.(*analysis.Analyzer), nil |
||||
} |
||||
|
||||
func AnalyzerTypesAndInstances() ([]string, []string) { |
||||
emptyConfig := map[string]interface{}{} |
||||
emptyCache := NewCache() |
||||
var types []string |
||||
var instances []string |
||||
for name, cons := range analyzers { |
||||
_, err := cons(emptyConfig, emptyCache) |
||||
if err == nil { |
||||
instances = append(instances, name) |
||||
} else { |
||||
types = append(types, name) |
||||
} |
||||
} |
||||
return types, instances |
||||
} |
@ -0,0 +1,87 @@ |
||||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package registry |
||||
|
||||
import ( |
||||
"fmt" |
||||
"sync" |
||||
) |
||||
|
||||
var ErrAlreadyDefined = fmt.Errorf("item already defined") |
||||
|
||||
type CacheBuild func(name string, config map[string]interface{}, cache *Cache) (interface{}, error) |
||||
|
||||
type ConcurrentCache struct { |
||||
mutex sync.RWMutex |
||||
data map[string]interface{} |
||||
} |
||||
|
||||
func NewConcurrentCache() *ConcurrentCache { |
||||
return &ConcurrentCache{ |
||||
data: make(map[string]interface{}), |
||||
} |
||||
} |
||||
|
||||
func (c *ConcurrentCache) ItemNamed(name string, cache *Cache, build CacheBuild) (interface{}, error) { |
||||
c.mutex.RLock() |
||||
item, cached := c.data[name] |
||||
if cached { |
||||
c.mutex.RUnlock() |
||||
return item, nil |
||||
} |
||||
// give up read lock
|
||||
c.mutex.RUnlock() |
||||
// try to build it
|
||||
newItem, err := build(name, nil, cache) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
// acquire write lock
|
||||
c.mutex.Lock() |
||||
defer c.mutex.Unlock() |
||||
// check again because it could have been created while trading locks
|
||||
item, cached = c.data[name] |
||||
if cached { |
||||
return item, nil |
||||
} |
||||
c.data[name] = newItem |
||||
return newItem, nil |
||||
} |
||||
|
||||
func (c *ConcurrentCache) DefineItem(name string, typ string, config map[string]interface{}, cache *Cache, build CacheBuild) (interface{}, error) { |
||||
c.mutex.RLock() |
||||
_, cached := c.data[name] |
||||
if cached { |
||||
c.mutex.RUnlock() |
||||
return nil, ErrAlreadyDefined |
||||
} |
||||
// give up read lock so others lookups can proceed
|
||||
c.mutex.RUnlock() |
||||
// really not there, try to build it
|
||||
newItem, err := build(typ, config, cache) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
// now we've built it, acquire lock
|
||||
c.mutex.Lock() |
||||
defer c.mutex.Unlock() |
||||
// check again because it could have been created while trading locks
|
||||
_, cached = c.data[name] |
||||
if cached { |
||||
return nil, ErrAlreadyDefined |
||||
} |
||||
c.data[name] = newItem |
||||
return newItem, nil |
||||
} |
@ -0,0 +1,89 @@ |
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package registry |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"github.com/blevesearch/bleve/analysis" |
||||
) |
||||
|
||||
func RegisterCharFilter(name string, constructor CharFilterConstructor) { |
||||
_, exists := charFilters[name] |
||||
if exists { |
||||
panic(fmt.Errorf("attempted to register duplicate char filter named '%s'", name)) |
||||
} |
||||
charFilters[name] = constructor |
||||
} |
||||
|
||||
type CharFilterConstructor func(config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) |
||||
type CharFilterRegistry map[string]CharFilterConstructor |
||||
|
||||
type CharFilterCache struct { |
||||
*ConcurrentCache |
||||
} |
||||
|
||||
func NewCharFilterCache() *CharFilterCache { |
||||
return &CharFilterCache{ |
||||
NewConcurrentCache(), |
||||
} |
||||
} |
||||
|
||||
func CharFilterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { |
||||
cons, registered := charFilters[name] |
||||
if !registered { |
||||
return nil, fmt.Errorf("no char filter with name or type '%s' registered", name) |
||||
} |
||||
charFilter, err := cons(config, cache) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("error building char filter: %v", err) |
||||
} |
||||
return charFilter, nil |
||||
} |
||||
|
||||
func (c *CharFilterCache) CharFilterNamed(name string, cache *Cache) (analysis.CharFilter, error) { |
||||
item, err := c.ItemNamed(name, cache, CharFilterBuild) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return item.(analysis.CharFilter), nil |
||||
} |
||||
|
||||
func (c *CharFilterCache) DefineCharFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) { |
||||
item, err := c.DefineItem(name, typ, config, cache, CharFilterBuild) |
||||
if err != nil { |
||||
if err == ErrAlreadyDefined { |
||||
return nil, fmt.Errorf("char filter named '%s' already defined", name) |
||||
} |
||||
return nil, err |
||||
} |
||||
return item.(analysis.CharFilter), nil |
||||
} |
||||
|
||||
func CharFilterTypesAndInstances() ([]string, []string) { |
||||
emptyConfig := map[string]interface{}{} |
||||
emptyCache := NewCache() |
||||
var types []string |
||||
var instances []string |
||||
for name, cons := range charFilters { |
||||
_, err := cons(emptyConfig, emptyCache) |
||||
if err == nil { |
||||
instances = append(instances, name) |
||||
} else { |
||||
types = append(types, name) |
||||
} |
||||
} |
||||
return types, instances |
||||
} |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue