gotosocial/vendor/github.com/golang/geo/s2/lexicon.go

// Copyright 2020 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package s2

import (
	"encoding/binary"
	"hash/adler32"
	"math"
	"sort"
)

// TODO(roberts): If any of these are worth making public, change the
// method signatures and type names.

// emptySetID represents the last ID that will ever be generated.
// (Non-negative IDs are reserved for singleton sets.)
var emptySetID = int32(math.MinInt32)

// idSetLexicon compactly represents a set of non-negative
// integers such as array indices ("ID sets"). It is especially suitable when
// either (1) there are many duplicate sets, or (2) there are many singleton
// or empty sets. See also sequenceLexicon.
//
// Each distinct ID set is mapped to a 32-bit integer. Empty and singleton
// sets take up no additional space; the set itself is represented
// by the unique ID assigned to the set. Duplicate sets are automatically
// eliminated. Note also that ID sets are referred to using 32-bit integers
// rather than pointers.
type idSetLexicon struct {
	idSets *sequenceLexicon
}

func newIDSetLexicon() *idSetLexicon {
	return &idSetLexicon{
		idSets: newSequenceLexicon(),
	}
}

// add adds the given set of integers to the lexicon if it is not already
// present, and return the unique ID for this set. The values are automatically
// sorted and duplicates are removed.
//
// The primary difference between this and sequenceLexicon are:
// 1. Empty and singleton sets are represented implicitly; they use no space.
// 2. Sets are represented rather than sequences; the ordering of values is
//    not important and duplicates are removed.
// 3. The values must be 32-bit non-negative integers only.
func (l *idSetLexicon) add(ids ...int32) int32 {
	// Empty sets have a special ID chosen not to conflict with other IDs.
	if len(ids) == 0 {
		return emptySetID
	}

	// Singleton sets are represented by their element.
	if len(ids) == 1 {
		return ids[0]
	}

	// Canonicalize the set by sorting and removing duplicates.
	//
	// Creates a new slice in order to not alter the supplied values.
	set := uniqueInt32s(ids)

	// Non-singleton sets are represented by the bitwise complement of the ID
	// returned by the sequenceLexicon
	return ^l.idSets.add(set)
}

// idSet returns the set of integers corresponding to an ID returned by add.
func (l *idSetLexicon) idSet(setID int32) []int32 {
	if setID >= 0 {
		return []int32{setID}
	}
	if setID == emptySetID {
		return []int32{}
	}

	return l.idSets.sequence(^setID)
}

func (l *idSetLexicon) clear() {
	l.idSets.clear()
}

// sequenceLexicon compactly represents a sequence of values (e.g., tuples).
// It automatically eliminates duplicates slices, and maps the remaining
// sequences to sequentially increasing integer IDs. See also idSetLexicon.
//
// Each distinct sequence is mapped to a 32-bit integer.
type sequenceLexicon struct {
	values []int32
	begins []uint32

	// idSet is a mapping of a sequence hash to sequence index in the lexicon.
	idSet map[uint32]int32
}

func newSequenceLexicon() *sequenceLexicon {
	return &sequenceLexicon{
		begins: []uint32{0},
		idSet:  make(map[uint32]int32),
	}
}

// clears all data from the lexicon.
func (l *sequenceLexicon) clear() {
	l.values = nil
	l.begins = []uint32{0}
	l.idSet = make(map[uint32]int32)
}

// add adds the given value to the lexicon if it is not already present, and
// returns its ID. IDs are assigned sequentially starting from zero.
func (l *sequenceLexicon) add(ids []int32) int32 {
	if id, ok := l.idSet[hashSet(ids)]; ok {
		return id
	}
	for _, v := range ids {
		l.values = append(l.values, v)
	}
	l.begins = append(l.begins, uint32(len(l.values)))

	id := int32(len(l.begins)) - 2
	l.idSet[hashSet(ids)] = id

	return id
}

// sequence returns the original sequence of values for the given ID.
func (l *sequenceLexicon) sequence(id int32) []int32 {
	return l.values[l.begins[id]:l.begins[id+1]]
}

// size reports the number of value sequences in the lexicon.
func (l *sequenceLexicon) size() int {
	// Subtract one because the list of begins starts out with the first element set to 0.
	return len(l.begins) - 1
}

// hash returns a hash of this sequence of int32s.
func hashSet(s []int32) uint32 {
	// TODO(roberts): We just need a way to nicely hash all the values down to
	// a 32-bit value. To ensure no unnecessary dependencies we use the core
	// library types available to do this. Is there a better option?
	a := adler32.New()
	binary.Write(a, binary.LittleEndian, s)
	return a.Sum32()
}

// uniqueInt32s returns the sorted and uniqued set of int32s from the input.
func uniqueInt32s(in []int32) []int32 {
	var vals []int32
	m := make(map[int32]bool)
	for _, i := range in {
		if m[i] {
			continue
		}
		m[i] = true
		vals = append(vals, i)
	}
	sort.Slice(vals, func(i, j int) bool { return vals[i] < vals[j] })
	return vals
}
Grand test fixup (#138) * start fixing up tests * fix up tests + automate with drone * fiddle with linting * messing about with drone.yml * some more fiddling * hmmm * add cache * add vendor directory * verbose * ci updates * update some little things * update sig 2021-08-12 19:03:24 +00:00			`// Copyright 2020 Google Inc. All rights reserved.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`package s2`

			`import (`
			`"encoding/binary"`
			`"hash/adler32"`
			`"math"`
			`"sort"`
			`)`

			`// TODO(roberts): If any of these are worth making public, change the`
			`// method signatures and type names.`

			`// emptySetID represents the last ID that will ever be generated.`
			`// (Non-negative IDs are reserved for singleton sets.)`
			`var emptySetID = int32(math.MinInt32)`

			`// idSetLexicon compactly represents a set of non-negative`
			`// integers such as array indices ("ID sets"). It is especially suitable when`
			`// either (1) there are many duplicate sets, or (2) there are many singleton`
			`// or empty sets. See also sequenceLexicon.`
			`//`
			`// Each distinct ID set is mapped to a 32-bit integer. Empty and singleton`
			`// sets take up no additional space; the set itself is represented`
			`// by the unique ID assigned to the set. Duplicate sets are automatically`
			`// eliminated. Note also that ID sets are referred to using 32-bit integers`
			`// rather than pointers.`
			`type idSetLexicon struct {`
			`idSets *sequenceLexicon`
			`}`

			`func newIDSetLexicon() *idSetLexicon {`
			`return &idSetLexicon{`
			`idSets: newSequenceLexicon(),`
			`}`
			`}`

			`// add adds the given set of integers to the lexicon if it is not already`
			`// present, and return the unique ID for this set. The values are automatically`
			`// sorted and duplicates are removed.`
			`//`
			`// The primary difference between this and sequenceLexicon are:`
			`// 1. Empty and singleton sets are represented implicitly; they use no space.`
			`// 2. Sets are represented rather than sequences; the ordering of values is`
			`// not important and duplicates are removed.`
			`// 3. The values must be 32-bit non-negative integers only.`
			`func (l *idSetLexicon) add(ids ...int32) int32 {`
			`// Empty sets have a special ID chosen not to conflict with other IDs.`
			`if len(ids) == 0 {`
			`return emptySetID`
			`}`

			`// Singleton sets are represented by their element.`
			`if len(ids) == 1 {`
			`return ids[0]`
			`}`

			`// Canonicalize the set by sorting and removing duplicates.`
			`//`
			`// Creates a new slice in order to not alter the supplied values.`
			`set := uniqueInt32s(ids)`

			`// Non-singleton sets are represented by the bitwise complement of the ID`
			`// returned by the sequenceLexicon`
			`return ^l.idSets.add(set)`
			`}`

			`// idSet returns the set of integers corresponding to an ID returned by add.`
			`func (l *idSetLexicon) idSet(setID int32) []int32 {`
			`if setID >= 0 {`
			`return []int32{setID}`
			`}`
			`if setID == emptySetID {`
			`return []int32{}`
			`}`

			`return l.idSets.sequence(^setID)`
			`}`

			`func (l *idSetLexicon) clear() {`
			`l.idSets.clear()`
			`}`

			`// sequenceLexicon compactly represents a sequence of values (e.g., tuples).`
			`// It automatically eliminates duplicates slices, and maps the remaining`
			`// sequences to sequentially increasing integer IDs. See also idSetLexicon.`
			`//`
			`// Each distinct sequence is mapped to a 32-bit integer.`
			`type sequenceLexicon struct {`
			`values []int32`
			`begins []uint32`

			`// idSet is a mapping of a sequence hash to sequence index in the lexicon.`
			`idSet map[uint32]int32`
			`}`

			`func newSequenceLexicon() *sequenceLexicon {`
			`return &sequenceLexicon{`
			`begins: []uint32{0},`
			`idSet: make(map[uint32]int32),`
			`}`
			`}`

			`// clears all data from the lexicon.`
			`func (l *sequenceLexicon) clear() {`
			`l.values = nil`
			`l.begins = []uint32{0}`
			`l.idSet = make(map[uint32]int32)`
			`}`

			`// add adds the given value to the lexicon if it is not already present, and`
			`// returns its ID. IDs are assigned sequentially starting from zero.`
			`func (l *sequenceLexicon) add(ids []int32) int32 {`
			`if id, ok := l.idSet[hashSet(ids)]; ok {`
			`return id`
			`}`
			`for _, v := range ids {`
			`l.values = append(l.values, v)`
			`}`
			`l.begins = append(l.begins, uint32(len(l.values)))`

			`id := int32(len(l.begins)) - 2`
			`l.idSet[hashSet(ids)] = id`

			`return id`
			`}`

			`// sequence returns the original sequence of values for the given ID.`
			`func (l *sequenceLexicon) sequence(id int32) []int32 {`
			`return l.values[l.begins[id]:l.begins[id+1]]`
			`}`

			`// size reports the number of value sequences in the lexicon.`
			`func (l *sequenceLexicon) size() int {`
			`// Subtract one because the list of begins starts out with the first element set to 0.`
			`return len(l.begins) - 1`
			`}`

			`// hash returns a hash of this sequence of int32s.`
			`func hashSet(s []int32) uint32 {`
			`// TODO(roberts): We just need a way to nicely hash all the values down to`
			`// a 32-bit value. To ensure no unnecessary dependencies we use the core`
			`// library types available to do this. Is there a better option?`
			`a := adler32.New()`
			`binary.Write(a, binary.LittleEndian, s)`
			`return a.Sum32()`
			`}`

			`// uniqueInt32s returns the sorted and uniqued set of int32s from the input.`
			`func uniqueInt32s(in []int32) []int32 {`
			`var vals []int32`
			`m := make(map[int32]bool)`
			`for _, i := range in {`
			`if m[i] {`
			`continue`
			`}`
			`m[i] = true`
			`vals = append(vals, i)`
			`}`
			`sort.Slice(vals, func(i, j int) bool { return vals[i] < vals[j] })`
			`return vals`
			`}`