forked from mirror/go-ethereum
accounts:smartcard wallet without the dependency on libpcsclite (#19273)
* accounts, core, internal, node: Add support for smartcard wallets * accounts, internal: Changes in response to review * vendor: pull in missing go-echd library * accounts/scwallet, console: user friendly card opening * accounts/scwallet: ordered wallets, tighter events, derivation logs * accounts, console: frendly card errors, support pin unblock * accounts/scwallet: fix crypto API change * accounts/scwallet: rebase and update * Fix some linter issues * Remove the direct dependency on libpcsclite Instead, use a go library that communicates with pcscd over a socket. Also update the changes introduced by @gravityblast since this PR's inception * Temporary fix to the ADBU status call * fix wallet status update This is a temporary fix, better checks need to be performed once the whole process has been validated. * Fix key derivation * Add some documentation * Update a comment to reflect the workings of the updated system * Vendor keycard-go/derivationpath * Formatting fixes * Add instructions on how to install the card * Achieve full transaction signature+sending * PK derivation has to be supported by the card * Fix linter issues * Upgrade to keycard app v2.1.1 * Set gballet as codeowner of the smartcard wallet dir * fix unnecessary condition linter warning * refuse to overwrite the master key of a previously initialized card * refresh the account list when initializing the card * Update the card preparation instructions based on review feedback * 'sanitize' JSON input Co-Authored-By: gballet <gballet@gmail.com> * Apply suggestions from code review Co-Authored-By: gballet <gballet@gmail.com> * fix a serialization error * more review feedback * More review feedback * Can now specify the number of empty accounts to derive * Fix rebase error: include norm package * Update bip-39 ref and remove ebfe/scard from vendor * Add missing dependencyChrisChinchilla-patch-3
commit
1fc3e44ffe
@ -0,0 +1,69 @@ |
||||
# Using the smartcard wallet |
||||
|
||||
## Requirements |
||||
|
||||
* A USB smartcard reader |
||||
* A keycard that supports the status app |
||||
* PCSCD version 4.3 running on your system **Only version 4.3 is currently supported** |
||||
|
||||
## Preparing the smartcard |
||||
|
||||
**WARNING: FOILLOWING THESE INSTRUCTIONS WILL DESTROY THE MASTER KEY ON YOUR CARD. ONLY PROCEED IF NO FUNDS ARE ASSOCIATED WITH THESE ACCOUNTS** |
||||
|
||||
You can use status' [keycard-cli](https://github.com/status-im/keycard-cli) and you should get version 2.1.1 of their [smartcard application](https://github.com/status-im/status-keycard/releases/download/2.1.1/keycard_v2.1.1.cap) |
||||
|
||||
You also need to make sure that the PCSC daemon is running on your system. |
||||
|
||||
Then, you can install the application to the card by typing: |
||||
|
||||
``` |
||||
keycard install -a keycard_v2.1.cap |
||||
``` |
||||
|
||||
Then you can initialize the application by typing: |
||||
|
||||
``` |
||||
keycard init |
||||
``` |
||||
|
||||
Then the card needs to be paired: |
||||
|
||||
``` |
||||
keycard pair |
||||
``` |
||||
|
||||
Finally, you need to have the card generate a new master key: |
||||
|
||||
``` |
||||
keycard shell <<END |
||||
keycard-select |
||||
keycard-set-pairing PAIRING_KEY PAIRING_INDEX |
||||
keycard-open-secure-channel |
||||
keycard-verify-pin CARD_PIN |
||||
keycard-generate-key |
||||
END |
||||
``` |
||||
|
||||
## Usage |
||||
|
||||
1. Start `geth` with the `console` command |
||||
2. Check the card's URL by checking `personal.listWallets`: |
||||
|
||||
``` |
||||
listWallets: [{ |
||||
status: "Online, can derive public keys", |
||||
url: "pcsc://a4d73015" |
||||
}] |
||||
``` |
||||
|
||||
3. Open the wallet, you will be prompted for your pairing password, then PIN: |
||||
|
||||
``` |
||||
personal.openWallet("pcsc://a4d73015") |
||||
``` |
||||
|
||||
4. Check that creation was successful by typing e.g. `personal`. Then use it like a regular wallet. |
||||
|
||||
## Known issues |
||||
|
||||
* Starting geth with a valid card seems to make firefox crash. |
@ -0,0 +1,87 @@ |
||||
// Copyright 2018 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package scwallet |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/binary" |
||||
"fmt" |
||||
) |
||||
|
||||
// commandAPDU represents an application data unit sent to a smartcard.
|
||||
type commandAPDU struct { |
||||
Cla, Ins, P1, P2 uint8 // Class, Instruction, Parameter 1, Parameter 2
|
||||
Data []byte // Command data
|
||||
Le uint8 // Command data length
|
||||
} |
||||
|
||||
// serialize serializes a command APDU.
|
||||
func (ca commandAPDU) serialize() ([]byte, error) { |
||||
buf := new(bytes.Buffer) |
||||
|
||||
if err := binary.Write(buf, binary.BigEndian, ca.Cla); err != nil { |
||||
return nil, err |
||||
} |
||||
if err := binary.Write(buf, binary.BigEndian, ca.Ins); err != nil { |
||||
return nil, err |
||||
} |
||||
if err := binary.Write(buf, binary.BigEndian, ca.P1); err != nil { |
||||
return nil, err |
||||
} |
||||
if err := binary.Write(buf, binary.BigEndian, ca.P2); err != nil { |
||||
return nil, err |
||||
} |
||||
if len(ca.Data) > 0 { |
||||
if err := binary.Write(buf, binary.BigEndian, uint8(len(ca.Data))); err != nil { |
||||
return nil, err |
||||
} |
||||
if err := binary.Write(buf, binary.BigEndian, ca.Data); err != nil { |
||||
return nil, err |
||||
} |
||||
} |
||||
if err := binary.Write(buf, binary.BigEndian, ca.Le); err != nil { |
||||
return nil, err |
||||
} |
||||
return buf.Bytes(), nil |
||||
} |
||||
|
||||
// responseAPDU represents an application data unit received from a smart card.
|
||||
type responseAPDU struct { |
||||
Data []byte // response data
|
||||
Sw1, Sw2 uint8 // status words 1 and 2
|
||||
} |
||||
|
||||
// deserialize deserializes a response APDU.
|
||||
func (ra *responseAPDU) deserialize(data []byte) error { |
||||
if len(data) < 2 { |
||||
return fmt.Errorf("can not deserialize data: payload too short (%d < 2)", len(data)) |
||||
} |
||||
|
||||
ra.Data = make([]byte, len(data)-2) |
||||
|
||||
buf := bytes.NewReader(data) |
||||
if err := binary.Read(buf, binary.BigEndian, &ra.Data); err != nil { |
||||
return err |
||||
} |
||||
if err := binary.Read(buf, binary.BigEndian, &ra.Sw1); err != nil { |
||||
return err |
||||
} |
||||
if err := binary.Read(buf, binary.BigEndian, &ra.Sw2); err != nil { |
||||
return err |
||||
} |
||||
return nil |
||||
} |
@ -0,0 +1,302 @@ |
||||
// Copyright 2018 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// This package implements support for smartcard-based hardware wallets such as
|
||||
// the one written by Status: https://github.com/status-im/hardware-wallet
|
||||
//
|
||||
// This implementation of smartcard wallets have a different interaction process
|
||||
// to other types of hardware wallet. The process works like this:
|
||||
//
|
||||
// 1. (First use with a given client) Establish a pairing between hardware
|
||||
// wallet and client. This requires a secret value called a 'pairing password'.
|
||||
// You can pair with an unpaired wallet with `personal.openWallet(URI, pairing password)`.
|
||||
// 2. (First use only) Initialize the wallet, which generates a keypair, stores
|
||||
// it on the wallet, and returns it so the user can back it up. You can
|
||||
// initialize a wallet with `personal.initializeWallet(URI)`.
|
||||
// 3. Connect to the wallet using the pairing information established in step 1.
|
||||
// You can connect to a paired wallet with `personal.openWallet(URI, PIN)`.
|
||||
// 4. Interact with the wallet as normal.
|
||||
|
||||
package scwallet |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
"io/ioutil" |
||||
"os" |
||||
"path/filepath" |
||||
"sort" |
||||
"sync" |
||||
"time" |
||||
|
||||
"github.com/ethereum/go-ethereum/accounts" |
||||
"github.com/ethereum/go-ethereum/common" |
||||
"github.com/ethereum/go-ethereum/event" |
||||
"github.com/ethereum/go-ethereum/log" |
||||
pcsc "github.com/gballet/go-libpcsclite" |
||||
) |
||||
|
||||
// Scheme is the URI prefix for smartcard wallets.
|
||||
const Scheme = "pcsc" |
||||
|
||||
// refreshCycle is the maximum time between wallet refreshes (if USB hotplug
|
||||
// notifications don't work).
|
||||
const refreshCycle = time.Second |
||||
|
||||
// refreshThrottling is the minimum time between wallet refreshes to avoid thrashing.
|
||||
const refreshThrottling = 500 * time.Millisecond |
||||
|
||||
// smartcardPairing contains information about a smart card we have paired with
|
||||
// or might pair with the hub.
|
||||
type smartcardPairing struct { |
||||
PublicKey []byte `json:"publicKey"` |
||||
PairingIndex uint8 `json:"pairingIndex"` |
||||
PairingKey []byte `json:"pairingKey"` |
||||
Accounts map[common.Address]accounts.DerivationPath `json:"accounts"` |
||||
} |
||||
|
||||
// Hub is a accounts.Backend that can find and handle generic PC/SC hardware wallets.
|
||||
type Hub struct { |
||||
scheme string // Protocol scheme prefixing account and wallet URLs.
|
||||
|
||||
context *pcsc.Client |
||||
datadir string |
||||
pairings map[string]smartcardPairing |
||||
|
||||
refreshed time.Time // Time instance when the list of wallets was last refreshed
|
||||
wallets map[string]*Wallet // Mapping from reader names to wallet instances
|
||||
updateFeed event.Feed // Event feed to notify wallet additions/removals
|
||||
updateScope event.SubscriptionScope // Subscription scope tracking current live listeners
|
||||
updating bool // Whether the event notification loop is running
|
||||
|
||||
quit chan chan error |
||||
|
||||
stateLock sync.RWMutex // Protects the internals of the hub from racey access
|
||||
} |
||||
|
||||
func (hub *Hub) readPairings() error { |
||||
hub.pairings = make(map[string]smartcardPairing) |
||||
pairingFile, err := os.Open(filepath.Join(hub.datadir, "smartcards.json")) |
||||
if err != nil { |
||||
if os.IsNotExist(err) { |
||||
return nil |
||||
} |
||||
return err |
||||
} |
||||
|
||||
pairingData, err := ioutil.ReadAll(pairingFile) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
var pairings []smartcardPairing |
||||
if err := json.Unmarshal(pairingData, &pairings); err != nil { |
||||
return err |
||||
} |
||||
|
||||
for _, pairing := range pairings { |
||||
hub.pairings[string(pairing.PublicKey)] = pairing |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (hub *Hub) writePairings() error { |
||||
pairingFile, err := os.OpenFile(filepath.Join(hub.datadir, "smartcards.json"), os.O_RDWR|os.O_CREATE, 0755) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
defer pairingFile.Close() |
||||
|
||||
pairings := make([]smartcardPairing, 0, len(hub.pairings)) |
||||
for _, pairing := range hub.pairings { |
||||
pairings = append(pairings, pairing) |
||||
} |
||||
|
||||
pairingData, err := json.Marshal(pairings) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
if _, err := pairingFile.Write(pairingData); err != nil { |
||||
return err |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (hub *Hub) pairing(wallet *Wallet) *smartcardPairing { |
||||
if pairing, ok := hub.pairings[string(wallet.PublicKey)]; ok { |
||||
return &pairing |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (hub *Hub) setPairing(wallet *Wallet, pairing *smartcardPairing) error { |
||||
if pairing == nil { |
||||
delete(hub.pairings, string(wallet.PublicKey)) |
||||
} else { |
||||
hub.pairings[string(wallet.PublicKey)] = *pairing |
||||
} |
||||
return hub.writePairings() |
||||
} |
||||
|
||||
// NewHub creates a new hardware wallet manager for smartcards.
|
||||
func NewHub(scheme string, datadir string) (*Hub, error) { |
||||
context, err := pcsc.EstablishContext(pcsc.ScopeSystem) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
hub := &Hub{ |
||||
scheme: scheme, |
||||
context: context, |
||||
datadir: datadir, |
||||
wallets: make(map[string]*Wallet), |
||||
quit: make(chan chan error), |
||||
} |
||||
if err := hub.readPairings(); err != nil { |
||||
return nil, err |
||||
} |
||||
hub.refreshWallets() |
||||
return hub, nil |
||||
} |
||||
|
||||
// Wallets implements accounts.Backend, returning all the currently tracked smart
|
||||
// cards that appear to be hardware wallets.
|
||||
func (hub *Hub) Wallets() []accounts.Wallet { |
||||
// Make sure the list of wallets is up to date
|
||||
hub.refreshWallets() |
||||
|
||||
hub.stateLock.RLock() |
||||
defer hub.stateLock.RUnlock() |
||||
|
||||
cpy := make([]accounts.Wallet, 0, len(hub.wallets)) |
||||
for _, wallet := range hub.wallets { |
||||
cpy = append(cpy, wallet) |
||||
} |
||||
sort.Sort(accounts.WalletsByURL(cpy)) |
||||
return cpy |
||||
} |
||||
|
||||
// refreshWallets scans the devices attached to the machine and updates the
|
||||
// list of wallets based on the found devices.
|
||||
func (hub *Hub) refreshWallets() { |
||||
// Don't scan the USB like crazy it the user fetches wallets in a loop
|
||||
hub.stateLock.RLock() |
||||
elapsed := time.Since(hub.refreshed) |
||||
hub.stateLock.RUnlock() |
||||
|
||||
if elapsed < refreshThrottling { |
||||
return |
||||
} |
||||
// Retrieve all the smart card reader to check for cards
|
||||
readers, err := hub.context.ListReaders() |
||||
if err != nil { |
||||
// This is a perverted hack, the scard library returns an error if no card
|
||||
// readers are present instead of simply returning an empty list. We don't
|
||||
// want to fill the user's log with errors, so filter those out.
|
||||
if err.Error() != "scard: Cannot find a smart card reader." { |
||||
log.Error("Failed to enumerate smart card readers", "err", err) |
||||
return |
||||
} |
||||
} |
||||
// Transform the current list of wallets into the new one
|
||||
hub.stateLock.Lock() |
||||
|
||||
events := []accounts.WalletEvent{} |
||||
seen := make(map[string]struct{}) |
||||
|
||||
for _, reader := range readers { |
||||
// Mark the reader as present
|
||||
seen[reader] = struct{}{} |
||||
|
||||
// If we alreay know about this card, skip to the next reader, otherwise clean up
|
||||
if wallet, ok := hub.wallets[reader]; ok { |
||||
if err := wallet.ping(); err == nil { |
||||
continue |
||||
} |
||||
wallet.Close() |
||||
events = append(events, accounts.WalletEvent{Wallet: wallet, Kind: accounts.WalletDropped}) |
||||
delete(hub.wallets, reader) |
||||
} |
||||
// New card detected, try to connect to it
|
||||
card, err := hub.context.Connect(reader, pcsc.ShareShared, pcsc.ProtocolAny) |
||||
if err != nil { |
||||
log.Debug("Failed to open smart card", "reader", reader, "err", err) |
||||
continue |
||||
} |
||||
wallet := NewWallet(hub, card) |
||||
if err = wallet.connect(); err != nil { |
||||
log.Debug("Failed to connect to smart card", "reader", reader, "err", err) |
||||
card.Disconnect(pcsc.LeaveCard) |
||||
continue |
||||
} |
||||
// Card connected, start tracking in amongs the wallets
|
||||
hub.wallets[reader] = wallet |
||||
events = append(events, accounts.WalletEvent{Wallet: wallet, Kind: accounts.WalletArrived}) |
||||
} |
||||
// Remove any wallets no longer present
|
||||
for reader, wallet := range hub.wallets { |
||||
if _, ok := seen[reader]; !ok { |
||||
wallet.Close() |
||||
events = append(events, accounts.WalletEvent{Wallet: wallet, Kind: accounts.WalletDropped}) |
||||
delete(hub.wallets, reader) |
||||
} |
||||
} |
||||
hub.refreshed = time.Now() |
||||
hub.stateLock.Unlock() |
||||
|
||||
for _, event := range events { |
||||
hub.updateFeed.Send(event) |
||||
} |
||||
} |
||||
|
||||
// Subscribe implements accounts.Backend, creating an async subscription to
|
||||
// receive notifications on the addition or removal of smart card wallets.
|
||||
func (hub *Hub) Subscribe(sink chan<- accounts.WalletEvent) event.Subscription { |
||||
// We need the mutex to reliably start/stop the update loop
|
||||
hub.stateLock.Lock() |
||||
defer hub.stateLock.Unlock() |
||||
|
||||
// Subscribe the caller and track the subscriber count
|
||||
sub := hub.updateScope.Track(hub.updateFeed.Subscribe(sink)) |
||||
|
||||
// Subscribers require an active notification loop, start it
|
||||
if !hub.updating { |
||||
hub.updating = true |
||||
go hub.updater() |
||||
} |
||||
return sub |
||||
} |
||||
|
||||
// updater is responsible for maintaining an up-to-date list of wallets managed
|
||||
// by the smart card hub, and for firing wallet addition/removal events.
|
||||
func (hub *Hub) updater() { |
||||
for { |
||||
// TODO: Wait for a USB hotplug event (not supported yet) or a refresh timeout
|
||||
// <-hub.changes
|
||||
time.Sleep(refreshCycle) |
||||
|
||||
// Run the wallet refresher
|
||||
hub.refreshWallets() |
||||
|
||||
// If all our subscribers left, stop the updater
|
||||
hub.stateLock.Lock() |
||||
if hub.updateScope.Count() == 0 { |
||||
hub.updating = false |
||||
hub.stateLock.Unlock() |
||||
return |
||||
} |
||||
hub.stateLock.Unlock() |
||||
} |
||||
} |
@ -0,0 +1,346 @@ |
||||
// Copyright 2018 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package scwallet |
||||
|
||||
import ( |
||||
"bytes" |
||||
"crypto/aes" |
||||
"crypto/cipher" |
||||
"crypto/rand" |
||||
"crypto/sha256" |
||||
"crypto/sha512" |
||||
"fmt" |
||||
|
||||
"github.com/ethereum/go-ethereum/crypto" |
||||
pcsc "github.com/gballet/go-libpcsclite" |
||||
"github.com/wsddn/go-ecdh" |
||||
"golang.org/x/crypto/pbkdf2" |
||||
"golang.org/x/text/unicode/norm" |
||||
) |
||||
|
||||
const ( |
||||
maxPayloadSize = 223 |
||||
pairP1FirstStep = 0 |
||||
pairP1LastStep = 1 |
||||
|
||||
scSecretLength = 32 |
||||
scBlockSize = 16 |
||||
|
||||
insOpenSecureChannel = 0x10 |
||||
insMutuallyAuthenticate = 0x11 |
||||
insPair = 0x12 |
||||
insUnpair = 0x13 |
||||
|
||||
pairingSalt = "Keycard Pairing Password Salt" |
||||
) |
||||
|
||||
// SecureChannelSession enables secure communication with a hardware wallet.
|
||||
type SecureChannelSession struct { |
||||
card *pcsc.Card // A handle to the smartcard for communication
|
||||
secret []byte // A shared secret generated from our ECDSA keys
|
||||
publicKey []byte // Our own ephemeral public key
|
||||
PairingKey []byte // A permanent shared secret for a pairing, if present
|
||||
sessionEncKey []byte // The current session encryption key
|
||||
sessionMacKey []byte // The current session MAC key
|
||||
iv []byte // The current IV
|
||||
PairingIndex uint8 // The pairing index
|
||||
} |
||||
|
||||
// NewSecureChannelSession creates a new secure channel for the given card and public key.
|
||||
func NewSecureChannelSession(card *pcsc.Card, keyData []byte) (*SecureChannelSession, error) { |
||||
// Generate an ECDSA keypair for ourselves
|
||||
gen := ecdh.NewEllipticECDH(crypto.S256()) |
||||
private, public, err := gen.GenerateKey(rand.Reader) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
cardPublic, ok := gen.Unmarshal(keyData) |
||||
if !ok { |
||||
return nil, fmt.Errorf("Could not unmarshal public key from card") |
||||
} |
||||
|
||||
secret, err := gen.GenerateSharedSecret(private, cardPublic) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
return &SecureChannelSession{ |
||||
card: card, |
||||
secret: secret, |
||||
publicKey: gen.Marshal(public), |
||||
}, nil |
||||
} |
||||
|
||||
// Pair establishes a new pairing with the smartcard.
|
||||
func (s *SecureChannelSession) Pair(pairingPassword []byte) error { |
||||
secretHash := pbkdf2.Key(norm.NFKD.Bytes(pairingPassword), norm.NFKD.Bytes([]byte(pairingSalt)), 50000, 32, sha256.New) |
||||
|
||||
challenge := make([]byte, 32) |
||||
if _, err := rand.Read(challenge); err != nil { |
||||
return err |
||||
} |
||||
|
||||
response, err := s.pair(pairP1FirstStep, challenge) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
md := sha256.New() |
||||
md.Write(secretHash[:]) |
||||
md.Write(challenge) |
||||
|
||||
expectedCryptogram := md.Sum(nil) |
||||
cardCryptogram := response.Data[:32] |
||||
cardChallenge := response.Data[32:64] |
||||
|
||||
if !bytes.Equal(expectedCryptogram, cardCryptogram) { |
||||
return fmt.Errorf("Invalid card cryptogram %v != %v", expectedCryptogram, cardCryptogram) |
||||
} |
||||
|
||||
md.Reset() |
||||
md.Write(secretHash[:]) |
||||
md.Write(cardChallenge) |
||||
response, err = s.pair(pairP1LastStep, md.Sum(nil)) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
md.Reset() |
||||
md.Write(secretHash[:]) |
||||
md.Write(response.Data[1:]) |
||||
s.PairingKey = md.Sum(nil) |
||||
s.PairingIndex = response.Data[0] |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// Unpair disestablishes an existing pairing.
|
||||
func (s *SecureChannelSession) Unpair() error { |
||||
if s.PairingKey == nil { |
||||
return fmt.Errorf("Cannot unpair: not paired") |
||||
} |
||||
|
||||
_, err := s.transmitEncrypted(claSCWallet, insUnpair, s.PairingIndex, 0, []byte{}) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
s.PairingKey = nil |
||||
// Close channel
|
||||
s.iv = nil |
||||
return nil |
||||
} |
||||
|
||||
// Open initializes the secure channel.
|
||||
func (s *SecureChannelSession) Open() error { |
||||
if s.iv != nil { |
||||
return fmt.Errorf("Session already opened") |
||||
} |
||||
|
||||
response, err := s.open() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
// Generate the encryption/mac key by hashing our shared secret,
|
||||
// pairing key, and the first bytes returned from the Open APDU.
|
||||
md := sha512.New() |
||||
md.Write(s.secret) |
||||
md.Write(s.PairingKey) |
||||
md.Write(response.Data[:scSecretLength]) |
||||
keyData := md.Sum(nil) |
||||
s.sessionEncKey = keyData[:scSecretLength] |
||||
s.sessionMacKey = keyData[scSecretLength : scSecretLength*2] |
||||
|
||||
// The IV is the last bytes returned from the Open APDU.
|
||||
s.iv = response.Data[scSecretLength:] |
||||
|
||||
return s.mutuallyAuthenticate() |
||||
} |
||||
|
||||
// mutuallyAuthenticate is an internal method to authenticate both ends of the
|
||||
// connection.
|
||||
func (s *SecureChannelSession) mutuallyAuthenticate() error { |
||||
data := make([]byte, scSecretLength) |
||||
if _, err := rand.Read(data); err != nil { |
||||
return err |
||||
} |
||||
|
||||
response, err := s.transmitEncrypted(claSCWallet, insMutuallyAuthenticate, 0, 0, data) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if response.Sw1 != 0x90 || response.Sw2 != 0x00 { |
||||
return fmt.Errorf("Got unexpected response from MUTUALLY_AUTHENTICATE: 0x%x%x", response.Sw1, response.Sw2) |
||||
} |
||||
|
||||
if len(response.Data) != scSecretLength { |
||||
return fmt.Errorf("Response from MUTUALLY_AUTHENTICATE was %d bytes, expected %d", len(response.Data), scSecretLength) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// open is an internal method that sends an open APDU.
|
||||
func (s *SecureChannelSession) open() (*responseAPDU, error) { |
||||
return transmit(s.card, &commandAPDU{ |
||||
Cla: claSCWallet, |
||||
Ins: insOpenSecureChannel, |
||||
P1: s.PairingIndex, |
||||
P2: 0, |
||||
Data: s.publicKey, |
||||
Le: 0, |
||||
}) |
||||
} |
||||
|
||||
// pair is an internal method that sends a pair APDU.
|
||||
func (s *SecureChannelSession) pair(p1 uint8, data []byte) (*responseAPDU, error) { |
||||
return transmit(s.card, &commandAPDU{ |
||||
Cla: claSCWallet, |
||||
Ins: insPair, |
||||
P1: p1, |
||||
P2: 0, |
||||
Data: data, |
||||
Le: 0, |
||||
}) |
||||
} |
||||
|
||||
// transmitEncrypted sends an encrypted message, and decrypts and returns the response.
|
||||
func (s *SecureChannelSession) transmitEncrypted(cla, ins, p1, p2 byte, data []byte) (*responseAPDU, error) { |
||||
if s.iv == nil { |
||||
return nil, fmt.Errorf("Channel not open") |
||||
} |
||||
|
||||
data, err := s.encryptAPDU(data) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
meta := [16]byte{cla, ins, p1, p2, byte(len(data) + scBlockSize)} |
||||
if err = s.updateIV(meta[:], data); err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
fulldata := make([]byte, len(s.iv)+len(data)) |
||||
copy(fulldata, s.iv) |
||||
copy(fulldata[len(s.iv):], data) |
||||
|
||||
response, err := transmit(s.card, &commandAPDU{ |
||||
Cla: cla, |
||||
Ins: ins, |
||||
P1: p1, |
||||
P2: p2, |
||||
Data: fulldata, |
||||
}) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
rmeta := [16]byte{byte(len(response.Data))} |
||||
rmac := response.Data[:len(s.iv)] |
||||
rdata := response.Data[len(s.iv):] |
||||
plainData, err := s.decryptAPDU(rdata) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
if err = s.updateIV(rmeta[:], rdata); err != nil { |
||||
return nil, err |
||||
} |
||||
if !bytes.Equal(s.iv, rmac) { |
||||
return nil, fmt.Errorf("Invalid MAC in response") |
||||
} |
||||
|
||||
rapdu := &responseAPDU{} |
||||
rapdu.deserialize(plainData) |
||||
|
||||
if rapdu.Sw1 != sw1Ok { |
||||
return nil, fmt.Errorf("Unexpected response status Cla=0x%x, Ins=0x%x, Sw=0x%x%x", cla, ins, rapdu.Sw1, rapdu.Sw2) |
||||
} |
||||
|
||||
return rapdu, nil |
||||
} |
||||
|
||||
// encryptAPDU is an internal method that serializes and encrypts an APDU.
|
||||
func (s *SecureChannelSession) encryptAPDU(data []byte) ([]byte, error) { |
||||
if len(data) > maxPayloadSize { |
||||
return nil, fmt.Errorf("Payload of %d bytes exceeds maximum of %d", len(data), maxPayloadSize) |
||||
} |
||||
data = pad(data, 0x80) |
||||
|
||||
ret := make([]byte, len(data)) |
||||
|
||||
a, err := aes.NewCipher(s.sessionEncKey) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
crypter := cipher.NewCBCEncrypter(a, s.iv) |
||||
crypter.CryptBlocks(ret, data) |
||||
return ret, nil |
||||
} |
||||
|
||||
// pad applies message padding to a 16 byte boundary.
|
||||
func pad(data []byte, terminator byte) []byte { |
||||
padded := make([]byte, (len(data)/16+1)*16) |
||||
copy(padded, data) |
||||
padded[len(data)] = terminator |
||||
return padded |
||||
} |
||||
|
||||
// decryptAPDU is an internal method that decrypts and deserializes an APDU.
|
||||
func (s *SecureChannelSession) decryptAPDU(data []byte) ([]byte, error) { |
||||
a, err := aes.NewCipher(s.sessionEncKey) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
ret := make([]byte, len(data)) |
||||
|
||||
crypter := cipher.NewCBCDecrypter(a, s.iv) |
||||
crypter.CryptBlocks(ret, data) |
||||
return unpad(ret, 0x80) |
||||
} |
||||
|
||||
// unpad strips padding from a message.
|
||||
func unpad(data []byte, terminator byte) ([]byte, error) { |
||||
for i := 1; i <= 16; i++ { |
||||
switch data[len(data)-i] { |
||||
case 0: |
||||
continue |
||||
case terminator: |
||||
return data[:len(data)-i], nil |
||||
default: |
||||
return nil, fmt.Errorf("Expected end of padding, got %d", data[len(data)-i]) |
||||
} |
||||
} |
||||
return nil, fmt.Errorf("Expected end of padding, got 0") |
||||
} |
||||
|
||||
// updateIV is an internal method that updates the initialization vector after
|
||||
// each message exchanged.
|
||||
func (s *SecureChannelSession) updateIV(meta, data []byte) error { |
||||
data = pad(data, 0) |
||||
a, err := aes.NewCipher(s.sessionMacKey) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
crypter := cipher.NewCBCEncrypter(a, make([]byte, 16)) |
||||
crypter.CryptBlocks(meta, meta) |
||||
crypter.CryptBlocks(data, data) |
||||
// The first 16 bytes of the last block is the MAC
|
||||
s.iv = data[len(data)-32 : len(data)-16] |
||||
return nil |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,31 @@ |
||||
// Copyright 2018 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package accounts |
||||
|
||||
// AccountsByURL implements sort.Interface for []Account based on the URL field.
|
||||
type AccountsByURL []Account |
||||
|
||||
func (a AccountsByURL) Len() int { return len(a) } |
||||
func (a AccountsByURL) Swap(i, j int) { a[i], a[j] = a[j], a[i] } |
||||
func (a AccountsByURL) Less(i, j int) bool { return a[i].URL.Cmp(a[j].URL) < 0 } |
||||
|
||||
// WalletsByURL implements sort.Interface for []Wallet based on the URL field.
|
||||
type WalletsByURL []Wallet |
||||
|
||||
func (w WalletsByURL) Len() int { return len(w) } |
||||
func (w WalletsByURL) Swap(i, j int) { w[i], w[j] = w[j], w[i] } |
||||
func (w WalletsByURL) Less(i, j int) bool { return w[i].URL().Cmp(w[j].URL()) < 0 } |
@ -0,0 +1,29 @@ |
||||
BSD 3-Clause License |
||||
|
||||
Copyright (c) 2019, Guillaume Ballet |
||||
All rights reserved. |
||||
|
||||
Redistribution and use in source and binary forms, with or without |
||||
modification, are permitted provided that the following conditions are met: |
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this |
||||
list of conditions and the following disclaimer. |
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice, |
||||
this list of conditions and the following disclaimer in the documentation |
||||
and/or other materials provided with the distribution. |
||||
|
||||
* Neither the name of the copyright holder nor the names of its |
||||
contributors may be used to endorse or promote products derived from |
||||
this software without specific prior written permission. |
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@ -0,0 +1,53 @@ |
||||
# go-libpcsclite |
||||
|
||||
A golang implementation of the [libpcpsclite](http://github.com/LudovicRousseau/PCSC) client. It connects to the `pcscd` daemon over sockets. |
||||
|
||||
## Purpose |
||||
|
||||
The goal is for major open source projects to distribute a single binary that doesn't depend on `libpcsclite`. It provides an extra function `CheckPCSCDaemon` that will tell the user if `pcscd` is running. |
||||
|
||||
## Building |
||||
|
||||
TODO |
||||
|
||||
## Example |
||||
|
||||
TODO |
||||
|
||||
## TODO |
||||
|
||||
- [ ] Finish this README |
||||
- [ ] Lock context |
||||
- [ ] implement missing functions |
||||
|
||||
## License |
||||
|
||||
BSD 3-Clause License |
||||
|
||||
Copyright (c) 2019, Guillaume Ballet |
||||
All rights reserved. |
||||
|
||||
Redistribution and use in source and binary forms, with or without |
||||
modification, are permitted provided that the following conditions are met: |
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this |
||||
list of conditions and the following disclaimer. |
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice, |
||||
this list of conditions and the following disclaimer in the documentation |
||||
and/or other materials provided with the distribution. |
||||
|
||||
* Neither the name of the copyright holder nor the names of its |
||||
contributors may be used to endorse or promote products derived from |
||||
this software without specific prior written permission. |
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@ -0,0 +1,99 @@ |
||||
// BSD 3-Clause License
|
||||
//
|
||||
// Copyright (c) 2019, Guillaume Ballet
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the name of the copyright holder nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package pcsc |
||||
|
||||
const ( |
||||
SCardSuccess = 0x00000000 /* No error was encountered. */ |
||||
|
||||
AutoAllocate = -1 /* see SCardFreeMemory() */ |
||||
ScopeUser = 0x0000 /* Scope in user space */ |
||||
ScopeTerminal = 0x0001 /* Scope in terminal */ |
||||
ScopeSystem = 0x0002 /* Scope in system */ |
||||
ScopeGlobal = 0x0003 /* Scope is global */ |
||||
|
||||
ProtocolUndefined = 0x0000 /* protocol not set */ |
||||
ProtocolUnSet = ProtocolUndefined /* backward compat */ |
||||
ProtocolT0 = 0x0001 /* T=0 active protocol. */ |
||||
ProtocolT1 = 0x0002 /* T=1 active protocol. */ |
||||
ProtocolRaw = 0x0004 /* Raw active protocol. */ |
||||
ProtocolT15 = 0x0008 /* T=15 protocol. */ |
||||
ProtocolAny = (ProtocolT0 | ProtocolT1) /* IFD determines prot. */ |
||||
|
||||
ShareExclusive = 0x0001 /* Exclusive mode only */ |
||||
ShareShared = 0x0002 /* Shared mode only */ |
||||
ShareDirect = 0x0003 /* Raw mode only */ |
||||
|
||||
LeaveCard = 0x0000 /* Do nothing on close */ |
||||
ResetCard = 0x0001 /* Reset on close */ |
||||
UnpowerCard = 0x0002 /* Power down on close */ |
||||
EjectCard = 0x0003 /* Eject on close */ |
||||
|
||||
SCardUnknown = 0x0001 /* Unknown state */ |
||||
SCardAbsent = 0x0002 /* Card is absent */ |
||||
SCardPresent = 0x0004 /* Card is present */ |
||||
SCardSwallowed = 0x0008 /* Card not powered */ |
||||
SCardPowever = 0x0010 /* Card is powered */ |
||||
SCardNegotiable = 0x0020 /* Ready for PTS */ |
||||
SCardSpecific = 0x0040 /* PTS has been set */ |
||||
|
||||
PCSCDSockName = "/run/pcscd/pcscd.comm" |
||||
) |
||||
|
||||
// List of commands to send to the daemon
|
||||
const ( |
||||
_ = iota |
||||
SCardEstablishContext /* used by SCardEstablishContext() */ |
||||
SCardReleaseContext /* used by SCardReleaseContext() */ |
||||
SCardListReaders /* used by SCardListReaders() */ |
||||
SCardConnect /* used by SCardConnect() */ |
||||
SCardReConnect /* used by SCardReconnect() */ |
||||
SCardDisConnect /* used by SCardDisconnect() */ |
||||
SCardBeginTransaction /* used by SCardBeginTransaction() */ |
||||
SCardEndTransaction /* used by SCardEndTransaction() */ |
||||
SCardTransmit /* used by SCardTransmit() */ |
||||
SCardControl /* used by SCardControl() */ |
||||
SCardStatus /* used by SCardStatus() */ |
||||
SCardGetStatusChange /* not used */ |
||||
SCardCancel /* used by SCardCancel() */ |
||||
SCardCancelTransaction /* not used */ |
||||
SCardGetAttrib /* used by SCardGetAttrib() */ |
||||
SCardSetAttrib /* used by SCardSetAttrib() */ |
||||
CommandVersion /* get the client/server protocol version */ |
||||
CommandGetReaderState /* get the readers state */ |
||||
CommandWaitReaderStateChange /* wait for a reader state change */ |
||||
CommandStopWaitingReaderStateChange /* stop waiting for a reader state change */ |
||||
) |
||||
|
||||
// Protocol information
|
||||
const ( |
||||
ProtocolVersionMajor = 4 /* IPC major */ |
||||
ProtocolVersionMinor = 3 /* IPC minor */ |
||||
) |
@ -0,0 +1 @@ |
||||
module github.com/gballet/go-libpcsclite |
@ -0,0 +1,78 @@ |
||||
// BSD 3-Clause License
|
||||
//
|
||||
// Copyright (c) 2019, Guillaume Ballet
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the name of the copyright holder nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package pcsc |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
"net" |
||||
) |
||||
|
||||
/** |
||||
* @brief Wrapper for the MessageSend() function. |
||||
* |
||||
* Called by clients to send messages to the server. |
||||
* The parameters \p command and \p data are set in the \c sharedSegmentMsg |
||||
* struct in order to be sent. |
||||
* |
||||
* @param[in] command Command to be sent. |
||||
* @param[in] dwClientID Client socket handle. |
||||
* @param[in] size Size of the message (\p data). |
||||
* @param[in] data_void Data to be sent. |
||||
* |
||||
* @return Same error codes as MessageSend(). |
||||
*/ |
||||
func messageSendWithHeader(command uint32, conn net.Conn, data []byte) error { |
||||
/* Translate header into bytes */ |
||||
msgData := make([]byte, 8+len(data)) |
||||
binary.LittleEndian.PutUint32(msgData[4:], command) |
||||
binary.LittleEndian.PutUint32(msgData, uint32(len(data))) |
||||
|
||||
/* Copy payload */ |
||||
copy(msgData[8:], data) |
||||
|
||||
_, err := conn.Write(msgData) |
||||
return err |
||||
} |
||||
|
||||
// ClientSetupSession prepares a communication channel for the client to talk to the server.
|
||||
// This is called by the application to create a socket for local IPC with the
|
||||
// server. The socket is associated to the file \c PCSCLITE_CSOCK_NAME.
|
||||
/* |
||||
* @param[out] pdwClientID Client Connection ID. |
||||
* |
||||
* @retval 0 Success. |
||||
* @retval -1 Can not create the socket. |
||||
* @retval -1 The socket can not open a connection. |
||||
* @retval -1 Can not set the socket to non-blocking. |
||||
*/ |
||||
func clientSetupSession() (net.Conn, error) { |
||||
return net.Dial("unix", PCSCDSockName) |
||||
} |
@ -0,0 +1,389 @@ |
||||
// BSD 3-Clause License
|
||||
//
|
||||
// Copyright (c) 2019, Guillaume Ballet
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice, this
|
||||
// list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the name of the copyright holder nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package pcsc |
||||
|
||||
import ( |
||||
"encoding/binary" |
||||
"fmt" |
||||
"net" |
||||
"sync" |
||||
"unsafe" |
||||
) |
||||
|
||||
// Client contains all the information needed to establish
|
||||
// and maintain a connection to the deamon/card.
|
||||
type Client struct { |
||||
conn net.Conn |
||||
|
||||
minor uint32 |
||||
major uint32 |
||||
|
||||
ctx uint32 |
||||
|
||||
mutex sync.Mutex |
||||
|
||||
readerStateDescriptors [MaxReaderStateDescriptors]ReaderState |
||||
} |
||||
|
||||
// EstablishContext asks the PCSC daemon to create a context
|
||||
// handle for further communication with connected cards and
|
||||
// readers.
|
||||
func EstablishContext(scope uint32) (*Client, error) { |
||||
client := &Client{} |
||||
|
||||
conn, err := clientSetupSession() |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
client.conn = conn |
||||
|
||||
/* Exchange version information */ |
||||
payload := make([]byte, 12) |
||||
binary.LittleEndian.PutUint32(payload, ProtocolVersionMajor) |
||||
binary.LittleEndian.PutUint32(payload[4:], ProtocolVersionMinor) |
||||
binary.LittleEndian.PutUint32(payload[8:], SCardSuccess) |
||||
err = messageSendWithHeader(CommandVersion, conn, payload) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
response := make([]byte, 12) |
||||
n, err := conn.Read(response) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if n != len(response) { |
||||
return nil, fmt.Errorf("invalid response length: expected %d, got %d", len(response), n) |
||||
} |
||||
code := binary.LittleEndian.Uint32(response[8:]) |
||||
if code != SCardSuccess { |
||||
return nil, fmt.Errorf("invalid response code: expected %d, got %d", SCardSuccess, code) |
||||
} |
||||
client.major = binary.LittleEndian.Uint32(response) |
||||
client.minor = binary.LittleEndian.Uint32(response[4:]) |
||||
if client.major != ProtocolVersionMajor || client.minor != ProtocolVersionMinor { |
||||
return nil, fmt.Errorf("invalid version found: expected %d.%d, got %d.%d", ProtocolVersionMajor, ProtocolVersionMinor, client.major, client.minor) |
||||
} |
||||
|
||||
/* Establish the context proper */ |
||||
binary.LittleEndian.PutUint32(payload, scope) |
||||
binary.LittleEndian.PutUint32(payload[4:], 0) |
||||
binary.LittleEndian.PutUint32(payload[8:], SCardSuccess) |
||||
err = messageSendWithHeader(SCardEstablishContext, conn, payload) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
response = make([]byte, 12) |
||||
n, err = conn.Read(response) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
if n != len(response) { |
||||
return nil, fmt.Errorf("invalid response length: expected %d, got %d", len(response), n) |
||||
} |
||||
code = binary.LittleEndian.Uint32(response[8:]) |
||||
if code != SCardSuccess { |
||||
return nil, fmt.Errorf("invalid response code: expected %d, got %d", SCardSuccess, code) |
||||
} |
||||
client.ctx = binary.LittleEndian.Uint32(response[4:]) |
||||
|
||||
return client, nil |
||||
} |
||||
|
||||
// ReleaseContext tells the daemon that the client will no longer
|
||||
// need the context.
|
||||
func (client *Client) ReleaseContext() error { |
||||
client.mutex.Lock() |
||||
defer client.mutex.Unlock() |
||||
|
||||
data := [8]byte{} |
||||
binary.LittleEndian.PutUint32(data[:], client.ctx) |
||||
binary.LittleEndian.PutUint32(data[4:], SCardSuccess) |
||||
err := messageSendWithHeader(SCardReleaseContext, client.conn, data[:]) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
total := 0 |
||||
for total < len(data) { |
||||
n, err := client.conn.Read(data[total:]) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
total += n |
||||
} |
||||
code := binary.LittleEndian.Uint32(data[4:]) |
||||
if code != SCardSuccess { |
||||
return fmt.Errorf("invalid return code: %x", code) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// Constants related to the reader state structure
|
||||
const ( |
||||
ReaderStateNameLength = 128 |
||||
ReaderStateMaxAtrSizeLength = 33 |
||||
// NOTE: ATR is 32-byte aligned in the C version, which means it's
|
||||
// actually 36 byte long and not 33.
|
||||
ReaderStateDescriptorLength = ReaderStateNameLength + ReaderStateMaxAtrSizeLength + 5*4 + 3 |
||||
|
||||
MaxReaderStateDescriptors = 16 |
||||
) |
||||
|
||||
// ReaderState represent the state of a single reader, as reported
|
||||
// by the PCSC daemon.
|
||||
type ReaderState struct { |
||||
Name string /* reader name */ |
||||
eventCounter uint32 /* number of card events */ |
||||
readerState uint32 /* SCARD_* bit field */ |
||||
readerSharing uint32 /* PCSCLITE_SHARING_* sharing status */ |
||||
|
||||
cardAtr [ReaderStateMaxAtrSizeLength]byte /* ATR */ |
||||
cardAtrLength uint32 /* ATR length */ |
||||
cardProtocol uint32 /* SCARD_PROTOCOL_* value */ |
||||
} |
||||
|
||||
func getReaderState(data []byte) (ReaderState, error) { |
||||
ret := ReaderState{} |
||||
if len(data) < ReaderStateDescriptorLength { |
||||
return ret, fmt.Errorf("could not unmarshall data of length %d < %d", len(data), ReaderStateDescriptorLength) |
||||
} |
||||
|
||||
ret.Name = string(data[:ReaderStateNameLength]) |
||||
ret.eventCounter = binary.LittleEndian.Uint32(data[unsafe.Offsetof(ret.eventCounter):]) |
||||
ret.readerState = binary.LittleEndian.Uint32(data[unsafe.Offsetof(ret.readerState):]) |
||||
ret.readerSharing = binary.LittleEndian.Uint32(data[unsafe.Offsetof(ret.readerSharing):]) |
||||
copy(ret.cardAtr[:], data[unsafe.Offsetof(ret.cardAtr):unsafe.Offsetof(ret.cardAtr)+ReaderStateMaxAtrSizeLength]) |
||||
ret.cardAtrLength = binary.LittleEndian.Uint32(data[unsafe.Offsetof(ret.cardAtrLength):]) |
||||
ret.cardProtocol = binary.LittleEndian.Uint32(data[unsafe.Offsetof(ret.cardProtocol):]) |
||||
|
||||
return ret, nil |
||||
} |
||||
|
||||
// ListReaders gets the list of readers from the daemon
|
||||
func (client *Client) ListReaders() ([]string, error) { |
||||
client.mutex.Lock() |
||||
defer client.mutex.Unlock() |
||||
|
||||
err := messageSendWithHeader(CommandGetReaderState, client.conn, []byte{}) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
response := make([]byte, ReaderStateDescriptorLength*MaxReaderStateDescriptors) |
||||
total := 0 |
||||
for total < len(response) { |
||||
n, err := client.conn.Read(response[total:]) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
total += n |
||||
} |
||||
|
||||
var names []string |
||||
for i := range client.readerStateDescriptors { |
||||
desc, err := getReaderState(response[i*ReaderStateDescriptorLength:]) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
client.readerStateDescriptors[i] = desc |
||||
if desc.Name[0] == 0 { |
||||
break |
||||
} |
||||
names = append(names, desc.Name) |
||||
} |
||||
|
||||
return names, nil |
||||
} |
||||
|
||||
// Offsets into the Connect request/response packet
|
||||
const ( |
||||
SCardConnectReaderNameOffset = 4 |
||||
SCardConnectShareModeOffset = SCardConnectReaderNameOffset + ReaderStateNameLength |
||||
SCardConnectPreferredProtocolOffset = SCardConnectShareModeOffset + 4 |
||||
SCardConnectReturnValueOffset = SCardConnectPreferredProtocolOffset + 12 |
||||
) |
||||
|
||||
// Card represents the connection to a card
|
||||
type Card struct { |
||||
handle uint32 |
||||
activeProto uint32 |
||||
client *Client |
||||
} |
||||
|
||||
// Connect asks the daemon to connect to the card
|
||||
func (client *Client) Connect(name string, shareMode uint32, preferredProtocol uint32) (*Card, error) { |
||||
client.mutex.Lock() |
||||
defer client.mutex.Unlock() |
||||
|
||||
request := make([]byte, ReaderStateNameLength+4*6) |
||||
binary.LittleEndian.PutUint32(request, client.ctx) |
||||
copy(request[SCardConnectReaderNameOffset:], []byte(name)) |
||||
binary.LittleEndian.PutUint32(request[SCardConnectShareModeOffset:], shareMode) |
||||
binary.LittleEndian.PutUint32(request[SCardConnectPreferredProtocolOffset:], preferredProtocol) |
||||
binary.LittleEndian.PutUint32(request[SCardConnectReturnValueOffset:], SCardSuccess) |
||||
|
||||
err := messageSendWithHeader(SCardConnect, client.conn, request) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
response := make([]byte, ReaderStateNameLength+4*6) |
||||
total := 0 |
||||
for total < len(response) { |
||||
n, err := client.conn.Read(response[total:]) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
fmt.Println("total, n", total, n, response) |
||||
total += n |
||||
} |
||||
code := binary.LittleEndian.Uint32(response[148:]) |
||||
if code != SCardSuccess { |
||||
return nil, fmt.Errorf("invalid return code: %x", code) |
||||
} |
||||
handle := binary.LittleEndian.Uint32(response[140:]) |
||||
active := binary.LittleEndian.Uint32(response[SCardConnectPreferredProtocolOffset:]) |
||||
|
||||
return &Card{handle: handle, activeProto: active, client: client}, nil |
||||
} |
||||
|
||||
/** |
||||
* @brief contained in \ref SCARD_TRANSMIT Messages. |
||||
* |
||||
* These data are passed throw the field \c sharedSegmentMsg.data. |
||||
*/ |
||||
type transmit struct { |
||||
hCard uint32 |
||||
ioSendPciProtocol uint32 |
||||
ioSendPciLength uint32 |
||||
cbSendLength uint32 |
||||
ioRecvPciProtocol uint32 |
||||
ioRecvPciLength uint32 |
||||
pcbRecvLength uint32 |
||||
rv uint32 |
||||
} |
||||
|
||||
// SCardIoRequest contains the info needed for performing an IO request
|
||||
type SCardIoRequest struct { |
||||
proto uint32 |
||||
length uint32 |
||||
} |
||||
|
||||
const ( |
||||
TransmitRequestLength = 32 |
||||
) |
||||
|
||||
// Transmit sends request data to a card and returns the response
|
||||
func (card *Card) Transmit(adpu []byte) ([]byte, *SCardIoRequest, error) { |
||||
card.client.mutex.Lock() |
||||
defer card.client.mutex.Unlock() |
||||
|
||||
request := [TransmitRequestLength]byte{} |
||||
binary.LittleEndian.PutUint32(request[:], card.handle) |
||||
binary.LittleEndian.PutUint32(request[4:] /*card.activeProto*/, 2) |
||||
binary.LittleEndian.PutUint32(request[8:], 8) |
||||
binary.LittleEndian.PutUint32(request[12:], uint32(len(adpu))) |
||||
binary.LittleEndian.PutUint32(request[16:], 0) |
||||
binary.LittleEndian.PutUint32(request[20:], 0) |
||||
binary.LittleEndian.PutUint32(request[24:], 0x10000) |
||||
binary.LittleEndian.PutUint32(request[28:], SCardSuccess) |
||||
err := messageSendWithHeader(SCardTransmit, card.client.conn, request[:]) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
// Add the ADPU payload after the transmit descriptor
|
||||
n, err := card.client.conn.Write(adpu) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
if n != len(adpu) { |
||||
return nil, nil, fmt.Errorf("Invalid number of bytes written: expected %d, got %d", len(adpu), n) |
||||
} |
||||
response := [TransmitRequestLength]byte{} |
||||
total := 0 |
||||
for total < len(response) { |
||||
n, err = card.client.conn.Read(response[total:]) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
total += n |
||||
} |
||||
|
||||
code := binary.LittleEndian.Uint32(response[28:]) |
||||
if code != SCardSuccess { |
||||
return nil, nil, fmt.Errorf("invalid return code: %x", code) |
||||
} |
||||
|
||||
// Recover the response data
|
||||
recvProto := binary.LittleEndian.Uint32(response[16:]) |
||||
recvLength := binary.LittleEndian.Uint32(response[20:]) |
||||
recv := &SCardIoRequest{proto: recvProto, length: recvLength} |
||||
recvLength = binary.LittleEndian.Uint32(response[24:]) |
||||
recvData := make([]byte, recvLength) |
||||
total = 0 |
||||
for uint32(total) < recvLength { |
||||
n, err := card.client.conn.Read(recvData[total:]) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
total += n |
||||
} |
||||
|
||||
return recvData, recv, nil |
||||
} |
||||
|
||||
// Disconnect tells the PCSC daemon that the client is no longer
|
||||
// interested in communicating with the card.
|
||||
func (card *Card) Disconnect(disposition uint32) error { |
||||
card.client.mutex.Lock() |
||||
defer card.client.mutex.Unlock() |
||||
|
||||
data := [12]byte{} |
||||
binary.LittleEndian.PutUint32(data[:], card.handle) |
||||
binary.LittleEndian.PutUint32(data[4:], disposition) |
||||
binary.LittleEndian.PutUint32(data[8:], SCardSuccess) |
||||
err := messageSendWithHeader(SCardDisConnect, card.client.conn, data[:]) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
total := 0 |
||||
for total < len(data) { |
||||
n, err := card.client.conn.Read(data[total:]) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
total += n |
||||
} |
||||
code := binary.LittleEndian.Uint32(data[8:]) |
||||
if code != SCardSuccess { |
||||
return fmt.Errorf("invalid return code: %x", code) |
||||
} |
||||
|
||||
return nil |
||||
} |
@ -0,0 +1,356 @@ |
||||
Mozilla Public License Version 2.0 |
||||
================================== |
||||
|
||||
### 1. Definitions |
||||
|
||||
**1.1. “Contributor”** |
||||
means each individual or legal entity that creates, contributes to |
||||
the creation of, or owns Covered Software. |
||||
|
||||
**1.2. “Contributor Version”** |
||||
means the combination of the Contributions of others (if any) used |
||||
by a Contributor and that particular Contributor's Contribution. |
||||
|
||||
**1.3. “Contribution”** |
||||
means Covered Software of a particular Contributor. |
||||
|
||||
**1.4. “Covered Software”** |
||||
means Source Code Form to which the initial Contributor has attached |
||||
the notice in Exhibit A, the Executable Form of such Source Code |
||||
Form, and Modifications of such Source Code Form, in each case |
||||
including portions thereof. |
||||
|
||||
**1.5. “Incompatible With Secondary Licenses”** |
||||
means |
||||
|
||||
* **(a)** that the initial Contributor has attached the notice described |
||||
in Exhibit B to the Covered Software; or |
||||
* **(b)** that the Covered Software was made available under the terms of |
||||
version 1.1 or earlier of the License, but not also under the |
||||
terms of a Secondary License. |
||||
|
||||
**1.6. “Executable Form”** |
||||
means any form of the work other than Source Code Form. |
||||
|
||||
**1.7. “Larger Work”** |
||||
means a work that combines Covered Software with other material, in |
||||
a separate file or files, that is not Covered Software. |
||||
|
||||
**1.8. “License”** |
||||
means this document. |
||||
|
||||
**1.9. “Licensable”** |
||||
means having the right to grant, to the maximum extent possible, |
||||
whether at the time of the initial grant or subsequently, any and |
||||
all of the rights conveyed by this License. |
||||
|
||||
**1.10. “Modifications”** |
||||
means any of the following: |
||||
|
||||
* **(a)** any file in Source Code Form that results from an addition to, |
||||
deletion from, or modification of the contents of Covered |
||||
Software; or |
||||
* **(b)** any new file in Source Code Form that contains any Covered |
||||
Software. |
||||
|
||||
**1.11. “Patent Claims” of a Contributor** |
||||
means any patent claim(s), including without limitation, method, |
||||
process, and apparatus claims, in any patent Licensable by such |
||||
Contributor that would be infringed, but for the grant of the |
||||
License, by the making, using, selling, offering for sale, having |
||||
made, import, or transfer of either its Contributions or its |
||||
Contributor Version. |
||||
|
||||
**1.12. “Secondary License”** |
||||
means either the GNU General Public License, Version 2.0, the GNU |
||||
Lesser General Public License, Version 2.1, the GNU Affero General |
||||
Public License, Version 3.0, or any later versions of those |
||||
licenses. |
||||
|
||||
**1.13. “Source Code Form”** |
||||
means the form of the work preferred for making modifications. |
||||
|
||||
**1.14. “You” (or “Your”)** |
||||
means an individual or a legal entity exercising rights under this |
||||
License. For legal entities, “You” includes any entity that |
||||
controls, is controlled by, or is under common control with You. For |
||||
purposes of this definition, “control” means **(a)** the power, direct |
||||
or indirect, to cause the direction or management of such entity, |
||||
whether by contract or otherwise, or **(b)** ownership of more than |
||||
fifty percent (50%) of the outstanding shares or beneficial |
||||
ownership of such entity. |
||||
|
||||
|
||||
### 2. License Grants and Conditions |
||||
|
||||
#### 2.1. Grants |
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free, |
||||
non-exclusive license: |
||||
|
||||
* **(a)** under intellectual property rights (other than patent or trademark) |
||||
Licensable by such Contributor to use, reproduce, make available, |
||||
modify, display, perform, distribute, and otherwise exploit its |
||||
Contributions, either on an unmodified basis, with Modifications, or |
||||
as part of a Larger Work; and |
||||
* **(b)** under Patent Claims of such Contributor to make, use, sell, offer |
||||
for sale, have made, import, and otherwise transfer either its |
||||
Contributions or its Contributor Version. |
||||
|
||||
#### 2.2. Effective Date |
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution |
||||
become effective for each Contribution on the date the Contributor first |
||||
distributes such Contribution. |
||||
|
||||
#### 2.3. Limitations on Grant Scope |
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under |
||||
this License. No additional rights or licenses will be implied from the |
||||
distribution or licensing of Covered Software under this License. |
||||
Notwithstanding Section 2.1(b) above, no patent license is granted by a |
||||
Contributor: |
||||
|
||||
* **(a)** for any code that a Contributor has removed from Covered Software; |
||||
or |
||||
* **(b)** for infringements caused by: **(i)** Your and any other third party's |
||||
modifications of Covered Software, or **(ii)** the combination of its |
||||
Contributions with other software (except as part of its Contributor |
||||
Version); or |
||||
* **(c)** under Patent Claims infringed by Covered Software in the absence of |
||||
its Contributions. |
||||
|
||||
This License does not grant any rights in the trademarks, service marks, |
||||
or logos of any Contributor (except as may be necessary to comply with |
||||
the notice requirements in Section 3.4). |
||||
|
||||
#### 2.4. Subsequent Licenses |
||||
|
||||
No Contributor makes additional grants as a result of Your choice to |
||||
distribute the Covered Software under a subsequent version of this |
||||
License (see Section 10.2) or under the terms of a Secondary License (if |
||||
permitted under the terms of Section 3.3). |
||||
|
||||
#### 2.5. Representation |
||||
|
||||
Each Contributor represents that the Contributor believes its |
||||
Contributions are its original creation(s) or it has sufficient rights |
||||
to grant the rights to its Contributions conveyed by this License. |
||||
|
||||
#### 2.6. Fair Use |
||||
|
||||
This License is not intended to limit any rights You have under |
||||
applicable copyright doctrines of fair use, fair dealing, or other |
||||
equivalents. |
||||
|
||||
#### 2.7. Conditions |
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted |
||||
in Section 2.1. |
||||
|
||||
|
||||
### 3. Responsibilities |
||||
|
||||
#### 3.1. Distribution of Source Form |
||||
|
||||
All distribution of Covered Software in Source Code Form, including any |
||||
Modifications that You create or to which You contribute, must be under |
||||
the terms of this License. You must inform recipients that the Source |
||||
Code Form of the Covered Software is governed by the terms of this |
||||
License, and how they can obtain a copy of this License. You may not |
||||
attempt to alter or restrict the recipients' rights in the Source Code |
||||
Form. |
||||
|
||||
#### 3.2. Distribution of Executable Form |
||||
|
||||
If You distribute Covered Software in Executable Form then: |
||||
|
||||
* **(a)** such Covered Software must also be made available in Source Code |
||||
Form, as described in Section 3.1, and You must inform recipients of |
||||
the Executable Form how they can obtain a copy of such Source Code |
||||
Form by reasonable means in a timely manner, at a charge no more |
||||
than the cost of distribution to the recipient; and |
||||
|
||||
* **(b)** You may distribute such Executable Form under the terms of this |
||||
License, or sublicense it under different terms, provided that the |
||||
license for the Executable Form does not attempt to limit or alter |
||||
the recipients' rights in the Source Code Form under this License. |
||||
|
||||
#### 3.3. Distribution of a Larger Work |
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice, |
||||
provided that You also comply with the requirements of this License for |
||||
the Covered Software. If the Larger Work is a combination of Covered |
||||
Software with a work governed by one or more Secondary Licenses, and the |
||||
Covered Software is not Incompatible With Secondary Licenses, this |
||||
License permits You to additionally distribute such Covered Software |
||||
under the terms of such Secondary License(s), so that the recipient of |
||||
the Larger Work may, at their option, further distribute the Covered |
||||
Software under the terms of either this License or such Secondary |
||||
License(s). |
||||
|
||||
#### 3.4. Notices |
||||
|
||||
You may not remove or alter the substance of any license notices |
||||
(including copyright notices, patent notices, disclaimers of warranty, |
||||
or limitations of liability) contained within the Source Code Form of |
||||
the Covered Software, except that You may alter any license notices to |
||||
the extent required to remedy known factual inaccuracies. |
||||
|
||||
#### 3.5. Application of Additional Terms |
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support, |
||||
indemnity or liability obligations to one or more recipients of Covered |
||||
Software. However, You may do so only on Your own behalf, and not on |
||||
behalf of any Contributor. You must make it absolutely clear that any |
||||
such warranty, support, indemnity, or liability obligation is offered by |
||||
You alone, and You hereby agree to indemnify every Contributor for any |
||||
liability incurred by such Contributor as a result of warranty, support, |
||||
indemnity or liability terms You offer. You may include additional |
||||
disclaimers of warranty and limitations of liability specific to any |
||||
jurisdiction. |
||||
|
||||
|
||||
### 4. Inability to Comply Due to Statute or Regulation |
||||
|
||||
If it is impossible for You to comply with any of the terms of this |
||||
License with respect to some or all of the Covered Software due to |
||||
statute, judicial order, or regulation then You must: **(a)** comply with |
||||
the terms of this License to the maximum extent possible; and **(b)** |
||||
describe the limitations and the code they affect. Such description must |
||||
be placed in a text file included with all distributions of the Covered |
||||
Software under this License. Except to the extent prohibited by statute |
||||
or regulation, such description must be sufficiently detailed for a |
||||
recipient of ordinary skill to be able to understand it. |
||||
|
||||
|
||||
### 5. Termination |
||||
|
||||
**5.1.** The rights granted under this License will terminate automatically |
||||
if You fail to comply with any of its terms. However, if You become |
||||
compliant, then the rights granted under this License from a particular |
||||
Contributor are reinstated **(a)** provisionally, unless and until such |
||||
Contributor explicitly and finally terminates Your grants, and **(b)** on an |
||||
ongoing basis, if such Contributor fails to notify You of the |
||||
non-compliance by some reasonable means prior to 60 days after You have |
||||
come back into compliance. Moreover, Your grants from a particular |
||||
Contributor are reinstated on an ongoing basis if such Contributor |
||||
notifies You of the non-compliance by some reasonable means, this is the |
||||
first time You have received notice of non-compliance with this License |
||||
from such Contributor, and You become compliant prior to 30 days after |
||||
Your receipt of the notice. |
||||
|
||||
**5.2.** If You initiate litigation against any entity by asserting a patent |
||||
infringement claim (excluding declaratory judgment actions, |
||||
counter-claims, and cross-claims) alleging that a Contributor Version |
||||
directly or indirectly infringes any patent, then the rights granted to |
||||
You by any and all Contributors for the Covered Software under Section |
||||
2.1 of this License shall terminate. |
||||
|
||||
**5.3.** In the event of termination under Sections 5.1 or 5.2 above, all |
||||
end user license agreements (excluding distributors and resellers) which |
||||
have been validly granted by You or Your distributors under this License |
||||
prior to termination shall survive termination. |
||||
|
||||
|
||||
### 6. Disclaimer of Warranty |
||||
|
||||
> Covered Software is provided under this License on an “as is” |
||||
> basis, without warranty of any kind, either expressed, implied, or |
||||
> statutory, including, without limitation, warranties that the |
||||
> Covered Software is free of defects, merchantable, fit for a |
||||
> particular purpose or non-infringing. The entire risk as to the |
||||
> quality and performance of the Covered Software is with You. |
||||
> Should any Covered Software prove defective in any respect, You |
||||
> (not any Contributor) assume the cost of any necessary servicing, |
||||
> repair, or correction. This disclaimer of warranty constitutes an |
||||
> essential part of this License. No use of any Covered Software is |
||||
> authorized under this License except under this disclaimer. |
||||
|
||||
### 7. Limitation of Liability |
||||
|
||||
> Under no circumstances and under no legal theory, whether tort |
||||
> (including negligence), contract, or otherwise, shall any |
||||
> Contributor, or anyone who distributes Covered Software as |
||||
> permitted above, be liable to You for any direct, indirect, |
||||
> special, incidental, or consequential damages of any character |
||||
> including, without limitation, damages for lost profits, loss of |
||||
> goodwill, work stoppage, computer failure or malfunction, or any |
||||
> and all other commercial damages or losses, even if such party |
||||
> shall have been informed of the possibility of such damages. This |
||||
> limitation of liability shall not apply to liability for death or |
||||
> personal injury resulting from such party's negligence to the |
||||
> extent applicable law prohibits such limitation. Some |
||||
> jurisdictions do not allow the exclusion or limitation of |
||||
> incidental or consequential damages, so this exclusion and |
||||
> limitation may not apply to You. |
||||
|
||||
|
||||
### 8. Litigation |
||||
|
||||
Any litigation relating to this License may be brought only in the |
||||
courts of a jurisdiction where the defendant maintains its principal |
||||
place of business and such litigation shall be governed by laws of that |
||||
jurisdiction, without reference to its conflict-of-law provisions. |
||||
Nothing in this Section shall prevent a party's ability to bring |
||||
cross-claims or counter-claims. |
||||
|
||||
|
||||
### 9. Miscellaneous |
||||
|
||||
This License represents the complete agreement concerning the subject |
||||
matter hereof. If any provision of this License is held to be |
||||
unenforceable, such provision shall be reformed only to the extent |
||||
necessary to make it enforceable. Any law or regulation which provides |
||||
that the language of a contract shall be construed against the drafter |
||||
shall not be used to construe this License against a Contributor. |
||||
|
||||
|
||||
### 10. Versions of the License |
||||
|
||||
#### 10.1. New Versions |
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section |
||||
10.3, no one other than the license steward has the right to modify or |
||||
publish new versions of this License. Each version will be given a |
||||
distinguishing version number. |
||||
|
||||
#### 10.2. Effect of New Versions |
||||
|
||||
You may distribute the Covered Software under the terms of the version |
||||
of the License under which You originally received the Covered Software, |
||||
or under the terms of any subsequent version published by the license |
||||
steward. |
||||
|
||||
#### 10.3. Modified Versions |
||||
|
||||
If you create software not governed by this License, and you want to |
||||
create a new license for such software, you may create and use a |
||||
modified version of this License if you rename the license and remove |
||||
any references to the name of the license steward (except to note that |
||||
such modified license differs from this License). |
||||
|
||||
#### 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses |
||||
|
||||
If You choose to distribute Source Code Form that is Incompatible With |
||||
Secondary Licenses under the terms of this version of the License, the |
||||
notice described in Exhibit B of this License must be attached. |
||||
|
||||
## Exhibit A - Source Code Form License Notice |
||||
|
||||
This Source Code Form is subject to the terms of the Mozilla Public |
||||
License, v. 2.0. If a copy of the MPL was not distributed with this |
||||
file, You can obtain one at http://mozilla.org/MPL/2.0/. |
||||
|
||||
If it is not possible or desirable to put the notice in a particular |
||||
file, then You may include the notice in a location (such as a LICENSE |
||||
file in a relevant directory) where a recipient would be likely to look |
||||
for such a notice. |
||||
|
||||
You may add additional accurate notices of copyright ownership. |
||||
|
||||
## Exhibit B - “Incompatible With Secondary Licenses” Notice |
||||
|
||||
This Source Code Form is "Incompatible With Secondary Licenses", as |
||||
defined by the Mozilla Public License, v. 2.0. |
||||
|
@ -0,0 +1,214 @@ |
||||
package derivationpath |
||||
|
||||
import ( |
||||
"fmt" |
||||
"io" |
||||
"strconv" |
||||
"strings" |
||||
) |
||||
|
||||
type StartingPoint int |
||||
|
||||
const ( |
||||
tokenMaster = 0x6D // char m
|
||||
tokenSeparator = 0x2F // char /
|
||||
tokenHardened = 0x27 // char '
|
||||
tokenDot = 0x2E // char .
|
||||
|
||||
hardenedStart = 0x80000000 // 2^31
|
||||
) |
||||
|
||||
const ( |
||||
StartingPointMaster StartingPoint = iota + 1 |
||||
StartingPointCurrent |
||||
StartingPointParent |
||||
) |
||||
|
||||
type parseFunc = func() error |
||||
|
||||
type decoder struct { |
||||
r *strings.Reader |
||||
f parseFunc |
||||
pos int |
||||
path []uint32 |
||||
start StartingPoint |
||||
currentToken string |
||||
currentTokenHardened bool |
||||
} |
||||
|
||||
func newDecoder(path string) *decoder { |
||||
d := &decoder{ |
||||
r: strings.NewReader(path), |
||||
} |
||||
|
||||
d.reset() |
||||
|
||||
return d |
||||
} |
||||
|
||||
func (d *decoder) reset() { |
||||
d.r.Seek(0, io.SeekStart) |
||||
d.pos = 0 |
||||
d.start = StartingPointCurrent |
||||
d.f = d.parseStart |
||||
d.path = make([]uint32, 0) |
||||
d.resetCurrentToken() |
||||
} |
||||
|
||||
func (d *decoder) resetCurrentToken() { |
||||
d.currentToken = "" |
||||
d.currentTokenHardened = false |
||||
} |
||||
|
||||
func (d *decoder) parse() (StartingPoint, []uint32, error) { |
||||
for { |
||||
err := d.f() |
||||
if err != nil { |
||||
if err == io.EOF { |
||||
err = nil |
||||
} else { |
||||
err = fmt.Errorf("at position %d, %s", d.pos, err.Error()) |
||||
} |
||||
|
||||
return d.start, d.path, err |
||||
} |
||||
} |
||||
|
||||
return d.start, d.path, nil |
||||
} |
||||
|
||||
func (d *decoder) readByte() (byte, error) { |
||||
b, err := d.r.ReadByte() |
||||
if err != nil { |
||||
return b, err |
||||
} |
||||
|
||||
d.pos++ |
||||
|
||||
return b, nil |
||||
} |
||||
|
||||
func (d *decoder) unreadByte() error { |
||||
err := d.r.UnreadByte() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
d.pos-- |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (d *decoder) parseStart() error { |
||||
b, err := d.readByte() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
if b == tokenMaster { |
||||
d.start = StartingPointMaster |
||||
d.f = d.parseSeparator |
||||
return nil |
||||
} |
||||
|
||||
if b == tokenDot { |
||||
b2, err := d.readByte() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
if b2 == tokenDot { |
||||
d.f = d.parseSeparator |
||||
d.start = StartingPointParent |
||||
return nil |
||||
} |
||||
|
||||
d.f = d.parseSeparator |
||||
d.start = StartingPointCurrent |
||||
return d.unreadByte() |
||||
} |
||||
|
||||
d.f = d.parseSegment |
||||
|
||||
return d.unreadByte() |
||||
} |
||||
|
||||
func (d *decoder) saveSegment() error { |
||||
if len(d.currentToken) > 0 { |
||||
i, err := strconv.ParseUint(d.currentToken, 10, 32) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
if i >= hardenedStart { |
||||
d.pos -= len(d.currentToken) - 1 |
||||
return fmt.Errorf("index must be lower than 2^31, got %d", i) |
||||
} |
||||
|
||||
if d.currentTokenHardened { |
||||
i += hardenedStart |
||||
} |
||||
|
||||
d.path = append(d.path, uint32(i)) |
||||
} |
||||
|
||||
d.f = d.parseSegment |
||||
d.resetCurrentToken() |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (d *decoder) parseSeparator() error { |
||||
b, err := d.readByte() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
if b == tokenSeparator { |
||||
return d.saveSegment() |
||||
} |
||||
|
||||
return fmt.Errorf("expected %s, got %s", string(tokenSeparator), string(b)) |
||||
} |
||||
|
||||
func (d *decoder) parseSegment() error { |
||||
b, err := d.readByte() |
||||
if err == io.EOF { |
||||
if len(d.currentToken) == 0 { |
||||
return fmt.Errorf("expected number, got EOF") |
||||
} |
||||
|
||||
if newErr := d.saveSegment(); newErr != nil { |
||||
return newErr |
||||
} |
||||
|
||||
return err |
||||
} |
||||
|
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
if len(d.currentToken) > 0 && b == tokenSeparator { |
||||
return d.saveSegment() |
||||
} |
||||
|
||||
if len(d.currentToken) > 0 && b == tokenHardened { |
||||
d.currentTokenHardened = true |
||||
d.f = d.parseSeparator |
||||
return nil |
||||
} |
||||
|
||||
if b < 0x30 || b > 0x39 { |
||||
return fmt.Errorf("expected number, got %s", string(b)) |
||||
} |
||||
|
||||
d.currentToken = fmt.Sprintf("%s%s", d.currentToken, string(b)) |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func Decode(str string) (StartingPoint, []uint32, error) { |
||||
d := newDecoder(str) |
||||
return d.parse() |
||||
} |
@ -0,0 +1,36 @@ |
||||
package derivationpath |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/binary" |
||||
"fmt" |
||||
"strings" |
||||
) |
||||
|
||||
func Encode(rawPath []uint32) string { |
||||
segments := []string{string(tokenMaster)} |
||||
|
||||
for _, i := range rawPath { |
||||
suffix := "" |
||||
|
||||
if i >= hardenedStart { |
||||
i = i - hardenedStart |
||||
suffix = string(tokenHardened) |
||||
} |
||||
|
||||
segments = append(segments, fmt.Sprintf("%d%s", i, suffix)) |
||||
} |
||||
|
||||
return strings.Join(segments, string(tokenSeparator)) |
||||
} |
||||
|
||||
func EncodeFromBytes(data []byte) (string, error) { |
||||
buf := bytes.NewBuffer(data) |
||||
rawPath := make([]uint32, buf.Len()/4) |
||||
err := binary.Read(buf, binary.BigEndian, &rawPath) |
||||
if err != nil { |
||||
return "", err |
||||
} |
||||
|
||||
return Encode(rawPath), nil |
||||
} |
@ -0,0 +1,15 @@ |
||||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. |
||||
|
||||
|
||||
[[projects]] |
||||
branch = "master" |
||||
name = "golang.org/x/crypto" |
||||
packages = ["pbkdf2"] |
||||
revision = "a49355c7e3f8fe157a85be2f77e6e269a0f89602" |
||||
|
||||
[solve-meta] |
||||
analyzer-name = "dep" |
||||
analyzer-version = 1 |
||||
inputs-digest = "d7f1a7207c39125afcb9ca2365832cb83458edfc17f2f7e8d28fd56f19436856" |
||||
solver-name = "gps-cdcl" |
||||
solver-version = 1 |
@ -0,0 +1,26 @@ |
||||
|
||||
# Gopkg.toml example |
||||
# |
||||
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md |
||||
# for detailed Gopkg.toml documentation. |
||||
# |
||||
# required = ["github.com/user/thing/cmd/thing"] |
||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"] |
||||
# |
||||
# [[constraint]] |
||||
# name = "github.com/user/project" |
||||
# version = "1.0.0" |
||||
# |
||||
# [[constraint]] |
||||
# name = "github.com/user/project2" |
||||
# branch = "dev" |
||||
# source = "github.com/myfork/project2" |
||||
# |
||||
# [[override]] |
||||
# name = "github.com/x/y" |
||||
# version = "2.4.0" |
||||
|
||||
|
||||
[[constraint]] |
||||
branch = "master" |
||||
name = "golang.org/x/crypto" |
@ -0,0 +1,21 @@ |
||||
The MIT License (MIT) |
||||
|
||||
Copyright (c) 2014-2018 Tyler Smith and contributors |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
of this software and associated documentation files (the "Software"), to deal |
||||
in the Software without restriction, including without limitation the rights |
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
copies of the Software, and to permit persons to whom the Software is |
||||
furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in all |
||||
copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
SOFTWARE. |
@ -0,0 +1,11 @@ |
||||
.DEFAULT_GOAL := help
|
||||
|
||||
tests: ## Run tests with coverage
|
||||
go test -v -cover ./...
|
||||
|
||||
profile_tests: ## Run tests and output coverage profiling
|
||||
go test -v -coverprofile=coverage.out .
|
||||
go tool cover -html=coverage.out
|
||||
|
||||
help: |
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
@ -0,0 +1,45 @@ |
||||
# go-bip39 |
||||
[![Build Status](https://travis-ci.org/tyler-smith/go-bip39.svg?branch=master)](https://travis-ci.org/tyler-smith/go-bip39) |
||||
[![license](https://img.shields.io/github/license/tyler-smith/go-bip39.svg?maxAge=2592000)](https://github.com/tyler-smith/go-bip39/blob/master/LICENSE) |
||||
[![Documentation](https://godoc.org/github.com/tyler-smith/go-bip39?status.svg)](http://godoc.org/github.com/tyler-smith/go-bip39) |
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/tyler-smith/go-bip39)](https://goreportcard.com/report/github.com/tyler-smith/go-bip39) |
||||
[![GitHub issues](https://img.shields.io/github/issues/tyler-smith/go-bip39.svg)](https://github.com/tyler-smith/go-bip39/issues) |
||||
|
||||
|
||||
A golang implementation of the BIP0039 spec for mnemonic seeds |
||||
|
||||
## Example |
||||
|
||||
```go |
||||
package main |
||||
|
||||
import ( |
||||
"github.com/tyler-smith/go-bip39" |
||||
"github.com/tyler-smith/go-bip32" |
||||
"fmt" |
||||
) |
||||
|
||||
func main(){ |
||||
// Generate a mnemonic for memorization or user-friendly seeds |
||||
entropy, _ := bip39.NewEntropy(256) |
||||
mnemonic, _ := bip39.NewMnemonic(entropy) |
||||
|
||||
// Generate a Bip32 HD wallet for the mnemonic and a user supplied password |
||||
seed := bip39.NewSeed(mnemonic, "Secret Passphrase") |
||||
|
||||
masterKey, _ := bip32.NewMasterKey(seed) |
||||
publicKey := masterKey.PublicKey() |
||||
|
||||
// Display mnemonic and keys |
||||
fmt.Println("Mnemonic: ", mnemonic) |
||||
fmt.Println("Master private key: ", masterKey) |
||||
fmt.Println("Master public key: ", publicKey) |
||||
} |
||||
``` |
||||
|
||||
## Credits |
||||
|
||||
Wordlists are from the [bip39 spec](https://github.com/bitcoin/bips/tree/master/bip-0039). |
||||
|
||||
Test vectors are from the standard Python BIP0039 implementation from the |
||||
Trezor team: [https://github.com/trezor/python-mnemonic](https://github.com/trezor/python-mnemonic) |
@ -0,0 +1,377 @@ |
||||
// Package bip39 is the Golang implementation of the BIP39 spec.
|
||||
//
|
||||
// The official BIP39 spec can be found at
|
||||
// https://github.com/bitcoin/bips/blob/master/bip-0039.mediawiki
|
||||
package bip39 |
||||
|
||||
import ( |
||||
"crypto/rand" |
||||
"crypto/sha256" |
||||
"crypto/sha512" |
||||
"encoding/binary" |
||||
"errors" |
||||
"fmt" |
||||
"math/big" |
||||
"strings" |
||||
|
||||
"github.com/tyler-smith/go-bip39/wordlists" |
||||
"golang.org/x/crypto/pbkdf2" |
||||
) |
||||
|
||||
var ( |
||||
// Some bitwise operands for working with big.Ints
|
||||
last11BitsMask = big.NewInt(2047) |
||||
shift11BitsMask = big.NewInt(2048) |
||||
bigOne = big.NewInt(1) |
||||
bigTwo = big.NewInt(2) |
||||
|
||||
// used to isolate the checksum bits from the entropy+checksum byte array
|
||||
wordLengthChecksumMasksMapping = map[int]*big.Int{ |
||||
12: big.NewInt(15), |
||||
15: big.NewInt(31), |
||||
18: big.NewInt(63), |
||||
21: big.NewInt(127), |
||||
24: big.NewInt(255), |
||||
} |
||||
// used to use only the desired x of 8 available checksum bits.
|
||||
// 256 bit (word length 24) requires all 8 bits of the checksum,
|
||||
// and thus no shifting is needed for it (we would get a divByZero crash if we did)
|
||||
wordLengthChecksumShiftMapping = map[int]*big.Int{ |
||||
12: big.NewInt(16), |
||||
15: big.NewInt(8), |
||||
18: big.NewInt(4), |
||||
21: big.NewInt(2), |
||||
} |
||||
|
||||
// wordList is the set of words to use
|
||||
wordList []string |
||||
|
||||
// wordMap is a reverse lookup map for wordList
|
||||
wordMap map[string]int |
||||
) |
||||
|
||||
var ( |
||||
// ErrInvalidMnemonic is returned when trying to use a malformed mnemonic.
|
||||
ErrInvalidMnemonic = errors.New("Invalid mnenomic") |
||||
|
||||
// ErrEntropyLengthInvalid is returned when trying to use an entropy set with
|
||||
// an invalid size.
|
||||
ErrEntropyLengthInvalid = errors.New("Entropy length must be [128, 256] and a multiple of 32") |
||||
|
||||
// ErrValidatedSeedLengthMismatch is returned when a validated seed is not the
|
||||
// same size as the given seed. This should never happen is present only as a
|
||||
// sanity assertion.
|
||||
ErrValidatedSeedLengthMismatch = errors.New("Seed length does not match validated seed length") |
||||
|
||||
// ErrChecksumIncorrect is returned when entropy has the incorrect checksum.
|
||||
ErrChecksumIncorrect = errors.New("Checksum incorrect") |
||||
) |
||||
|
||||
func init() { |
||||
SetWordList(wordlists.English) |
||||
} |
||||
|
||||
// SetWordList sets the list of words to use for mnemonics. Currently the list
|
||||
// that is set is used package-wide.
|
||||
func SetWordList(list []string) { |
||||
wordList = list |
||||
wordMap = map[string]int{} |
||||
for i, v := range wordList { |
||||
wordMap[v] = i |
||||
} |
||||
} |
||||
|
||||
// GetWordList gets the list of words to use for mnemonics.
|
||||
func GetWordList() []string { |
||||
return wordList |
||||
} |
||||
|
||||
// GetWordIndex gets word index in wordMap.
|
||||
func GetWordIndex(word string) (int, bool) { |
||||
idx, ok := wordMap[word] |
||||
return idx, ok |
||||
} |
||||
|
||||
// NewEntropy will create random entropy bytes
|
||||
// so long as the requested size bitSize is an appropriate size.
|
||||
//
|
||||
// bitSize has to be a multiple 32 and be within the inclusive range of {128, 256}
|
||||
func NewEntropy(bitSize int) ([]byte, error) { |
||||
err := validateEntropyBitSize(bitSize) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
entropy := make([]byte, bitSize/8) |
||||
_, err = rand.Read(entropy) |
||||
return entropy, err |
||||
} |
||||
|
||||
// EntropyFromMnemonic takes a mnemonic generated by this library,
|
||||
// and returns the input entropy used to generate the given mnemonic.
|
||||
// An error is returned if the given mnemonic is invalid.
|
||||
func EntropyFromMnemonic(mnemonic string) ([]byte, error) { |
||||
mnemonicSlice, isValid := splitMnemonicWords(mnemonic) |
||||
if !isValid { |
||||
return nil, ErrInvalidMnemonic |
||||
} |
||||
|
||||
// Decode the words into a big.Int.
|
||||
b := big.NewInt(0) |
||||
for _, v := range mnemonicSlice { |
||||
index, found := wordMap[v] |
||||
if found == false { |
||||
return nil, fmt.Errorf("word `%v` not found in reverse map", v) |
||||
} |
||||
var wordBytes [2]byte |
||||
binary.BigEndian.PutUint16(wordBytes[:], uint16(index)) |
||||
b = b.Mul(b, shift11BitsMask) |
||||
b = b.Or(b, big.NewInt(0).SetBytes(wordBytes[:])) |
||||
} |
||||
|
||||
// Build and add the checksum to the big.Int.
|
||||
checksum := big.NewInt(0) |
||||
checksumMask := wordLengthChecksumMasksMapping[len(mnemonicSlice)] |
||||
checksum = checksum.And(b, checksumMask) |
||||
|
||||
b.Div(b, big.NewInt(0).Add(checksumMask, bigOne)) |
||||
|
||||
// The entropy is the underlying bytes of the big.Int. Any upper bytes of
|
||||
// all 0's are not returned so we pad the beginning of the slice with empty
|
||||
// bytes if necessary.
|
||||
entropy := b.Bytes() |
||||
entropy = padByteSlice(entropy, len(mnemonicSlice)/3*4) |
||||
|
||||
// Generate the checksum and compare with the one we got from the mneomnic.
|
||||
entropyChecksumBytes := computeChecksum(entropy) |
||||
entropyChecksum := big.NewInt(int64(entropyChecksumBytes[0])) |
||||
if l := len(mnemonicSlice); l != 24 { |
||||
checksumShift := wordLengthChecksumShiftMapping[l] |
||||
entropyChecksum.Div(entropyChecksum, checksumShift) |
||||
} |
||||
|
||||
if checksum.Cmp(entropyChecksum) != 0 { |
||||
return nil, ErrChecksumIncorrect |
||||
} |
||||
|
||||
return entropy, nil |
||||
} |
||||
|
||||
// NewMnemonic will return a string consisting of the mnemonic words for
|
||||
// the given entropy.
|
||||
// If the provide entropy is invalid, an error will be returned.
|
||||
func NewMnemonic(entropy []byte) (string, error) { |
||||
// Compute some lengths for convenience.
|
||||
entropyBitLength := len(entropy) * 8 |
||||
checksumBitLength := entropyBitLength / 32 |
||||
sentenceLength := (entropyBitLength + checksumBitLength) / 11 |
||||
|
||||
// Validate that the requested size is supported.
|
||||
err := validateEntropyBitSize(entropyBitLength) |
||||
if err != nil { |
||||
return "", err |
||||
} |
||||
|
||||
// Add checksum to entropy.
|
||||
entropy = addChecksum(entropy) |
||||
|
||||
// Break entropy up into sentenceLength chunks of 11 bits.
|
||||
// For each word AND mask the rightmost 11 bits and find the word at that index.
|
||||
// Then bitshift entropy 11 bits right and repeat.
|
||||
// Add to the last empty slot so we can work with LSBs instead of MSB.
|
||||
|
||||
// Entropy as an int so we can bitmask without worrying about bytes slices.
|
||||
entropyInt := new(big.Int).SetBytes(entropy) |
||||
|
||||
// Slice to hold words in.
|
||||
words := make([]string, sentenceLength) |
||||
|
||||
// Throw away big.Int for AND masking.
|
||||
word := big.NewInt(0) |
||||
|
||||
for i := sentenceLength - 1; i >= 0; i-- { |
||||
// Get 11 right most bits and bitshift 11 to the right for next time.
|
||||
word.And(entropyInt, last11BitsMask) |
||||
entropyInt.Div(entropyInt, shift11BitsMask) |
||||
|
||||
// Get the bytes representing the 11 bits as a 2 byte slice.
|
||||
wordBytes := padByteSlice(word.Bytes(), 2) |
||||
|
||||
// Convert bytes to an index and add that word to the list.
|
||||
words[i] = wordList[binary.BigEndian.Uint16(wordBytes)] |
||||
} |
||||
|
||||
return strings.Join(words, " "), nil |
||||
} |
||||
|
||||
// MnemonicToByteArray takes a mnemonic string and turns it into a byte array
|
||||
// suitable for creating another mnemonic.
|
||||
// An error is returned if the mnemonic is invalid.
|
||||
func MnemonicToByteArray(mnemonic string, raw ...bool) ([]byte, error) { |
||||
var ( |
||||
mnemonicSlice = strings.Split(mnemonic, " ") |
||||
entropyBitSize = len(mnemonicSlice) * 11 |
||||
checksumBitSize = entropyBitSize % 32 |
||||
fullByteSize = (entropyBitSize-checksumBitSize)/8 + 1 |
||||
checksumByteSize = fullByteSize - (fullByteSize % 4) |
||||
) |
||||
|
||||
// Pre validate that the mnemonic is well formed and only contains words that
|
||||
// are present in the word list.
|
||||
if !IsMnemonicValid(mnemonic) { |
||||
return nil, ErrInvalidMnemonic |
||||
} |
||||
|
||||
// Convert word indices to a big.Int representing the entropy.
|
||||
checksummedEntropy := big.NewInt(0) |
||||
modulo := big.NewInt(2048) |
||||
for _, v := range mnemonicSlice { |
||||
index := big.NewInt(int64(wordMap[v])) |
||||
checksummedEntropy.Mul(checksummedEntropy, modulo) |
||||
checksummedEntropy.Add(checksummedEntropy, index) |
||||
} |
||||
|
||||
// Calculate the unchecksummed entropy so we can validate that the checksum is
|
||||
// correct.
|
||||
checksumModulo := big.NewInt(0).Exp(bigTwo, big.NewInt(int64(checksumBitSize)), nil) |
||||
rawEntropy := big.NewInt(0).Div(checksummedEntropy, checksumModulo) |
||||
|
||||
// Convert big.Ints to byte padded byte slices.
|
||||
rawEntropyBytes := padByteSlice(rawEntropy.Bytes(), checksumByteSize) |
||||
checksummedEntropyBytes := padByteSlice(checksummedEntropy.Bytes(), fullByteSize) |
||||
|
||||
// Validate that the checksum is correct.
|
||||
newChecksummedEntropyBytes := padByteSlice(addChecksum(rawEntropyBytes), fullByteSize) |
||||
if !compareByteSlices(checksummedEntropyBytes, newChecksummedEntropyBytes) { |
||||
return nil, ErrChecksumIncorrect |
||||
} |
||||
|
||||
if len(raw) > 0 && raw[0] { |
||||
return rawEntropyBytes, nil |
||||
} |
||||
|
||||
return checksummedEntropyBytes, nil |
||||
} |
||||
|
||||
// NewSeedWithErrorChecking creates a hashed seed output given the mnemonic string and a password.
|
||||
// An error is returned if the mnemonic is not convertible to a byte array.
|
||||
func NewSeedWithErrorChecking(mnemonic string, password string) ([]byte, error) { |
||||
_, err := MnemonicToByteArray(mnemonic) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
return NewSeed(mnemonic, password), nil |
||||
} |
||||
|
||||
// NewSeed creates a hashed seed output given a provided string and password.
|
||||
// No checking is performed to validate that the string provided is a valid mnemonic.
|
||||
func NewSeed(mnemonic string, password string) []byte { |
||||
return pbkdf2.Key([]byte(mnemonic), []byte("mnemonic"+password), 2048, 64, sha512.New) |
||||
} |
||||
|
||||
// IsMnemonicValid attempts to verify that the provided mnemonic is valid.
|
||||
// Validity is determined by both the number of words being appropriate,
|
||||
// and that all the words in the mnemonic are present in the word list.
|
||||
func IsMnemonicValid(mnemonic string) bool { |
||||
// Create a list of all the words in the mnemonic sentence
|
||||
words := strings.Fields(mnemonic) |
||||
|
||||
// Get word count
|
||||
wordCount := len(words) |
||||
|
||||
// The number of words should be 12, 15, 18, 21 or 24
|
||||
if wordCount%3 != 0 || wordCount < 12 || wordCount > 24 { |
||||
return false |
||||
} |
||||
|
||||
// Check if all words belong in the wordlist
|
||||
for _, word := range words { |
||||
if _, ok := wordMap[word]; !ok { |
||||
return false |
||||
} |
||||
} |
||||
|
||||
return true |
||||
} |
||||
|
||||
// Appends to data the first (len(data) / 32)bits of the result of sha256(data)
|
||||
// Currently only supports data up to 32 bytes
|
||||
func addChecksum(data []byte) []byte { |
||||
// Get first byte of sha256
|
||||
hash := computeChecksum(data) |
||||
firstChecksumByte := hash[0] |
||||
|
||||
// len() is in bytes so we divide by 4
|
||||
checksumBitLength := uint(len(data) / 4) |
||||
|
||||
// For each bit of check sum we want we shift the data one the left
|
||||
// and then set the (new) right most bit equal to checksum bit at that index
|
||||
// staring from the left
|
||||
dataBigInt := new(big.Int).SetBytes(data) |
||||
for i := uint(0); i < checksumBitLength; i++ { |
||||
// Bitshift 1 left
|
||||
dataBigInt.Mul(dataBigInt, bigTwo) |
||||
|
||||
// Set rightmost bit if leftmost checksum bit is set
|
||||
if uint8(firstChecksumByte&(1<<(7-i))) > 0 { |
||||
dataBigInt.Or(dataBigInt, bigOne) |
||||
} |
||||
} |
||||
|
||||
return dataBigInt.Bytes() |
||||
} |
||||
|
||||
func computeChecksum(data []byte) []byte { |
||||
hasher := sha256.New() |
||||
hasher.Write(data) |
||||
return hasher.Sum(nil) |
||||
} |
||||
|
||||
// validateEntropyBitSize ensures that entropy is the correct size for being a
|
||||
// mnemonic.
|
||||
func validateEntropyBitSize(bitSize int) error { |
||||
if (bitSize%32) != 0 || bitSize < 128 || bitSize > 256 { |
||||
return ErrEntropyLengthInvalid |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// padByteSlice returns a byte slice of the given size with contents of the
|
||||
// given slice left padded and any empty spaces filled with 0's.
|
||||
func padByteSlice(slice []byte, length int) []byte { |
||||
offset := length - len(slice) |
||||
if offset <= 0 { |
||||
return slice |
||||
} |
||||
newSlice := make([]byte, length) |
||||
copy(newSlice[offset:], slice) |
||||
return newSlice |
||||
} |
||||
|
||||
// compareByteSlices returns true of the byte slices have equal contents and
|
||||
// returns false otherwise.
|
||||
func compareByteSlices(a, b []byte) bool { |
||||
if len(a) != len(b) { |
||||
return false |
||||
} |
||||
for i := range a { |
||||
if a[i] != b[i] { |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
|
||||
func splitMnemonicWords(mnemonic string) ([]string, bool) { |
||||
// Create a list of all the words in the mnemonic sentence
|
||||
words := strings.Fields(mnemonic) |
||||
|
||||
// Get num of words
|
||||
numOfWords := len(words) |
||||
|
||||
// The number of words should be 12, 15, 18, 21 or 24
|
||||
if numOfWords%3 != 0 || numOfWords < 12 || numOfWords > 24 { |
||||
return nil, false |
||||
} |
||||
return words, true |
||||
} |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,24 @@ |
||||
Copyright (c) 2014, tang0th |
||||
All rights reserved. |
||||
|
||||
Redistribution and use in source and binary forms, with or without |
||||
modification, are permitted provided that the following conditions are met: |
||||
* Redistributions of source code must retain the above copyright |
||||
notice, this list of conditions and the following disclaimer. |
||||
* Redistributions in binary form must reproduce the above copyright |
||||
notice, this list of conditions and the following disclaimer in the |
||||
documentation and/or other materials provided with the distribution. |
||||
* Neither the name of tang0th nor the names of its contributors may be |
||||
used to endorse or promote products derived from this software without |
||||
specific prior written permission. |
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY |
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@ -0,0 +1,19 @@ |
||||
# ECDH |
||||
|
||||
[![Build Status](https://travis-ci.org/wsddn/go-ecdh.svg?branch=master)](https://travis-ci.org/wsddn/go-ecdh) |
||||
|
||||
This is a go implementation of elliptical curve diffie-hellman key exchange method. |
||||
It supports the NIST curves (and any curves using the `elliptic.Curve` go interface) |
||||
as well as djb's curve25519. |
||||
|
||||
The library handles generating of keys, generating a shared secret, and the |
||||
(un)marshalling of the elliptical curve keys into slices of bytes. |
||||
|
||||
## Warning and Disclaimer |
||||
I am not a cryptographer, this was written as part of a personal project to learn about cryptographic systems and protocols. No claims as to the security of this library are made, I would not advise using it for anything that requires any level of security. Pull requests or issues about security flaws are however still welcome. |
||||
|
||||
## Compatibility |
||||
Works with go 1.2 onwards. |
||||
|
||||
## TODO |
||||
* Improve documentation |
@ -0,0 +1,62 @@ |
||||
package ecdh |
||||
|
||||
import ( |
||||
"crypto" |
||||
"io" |
||||
|
||||
"golang.org/x/crypto/curve25519" |
||||
) |
||||
|
||||
type curve25519ECDH struct { |
||||
ECDH |
||||
} |
||||
|
||||
// NewCurve25519ECDH creates a new ECDH instance that uses djb's curve25519
|
||||
// elliptical curve.
|
||||
func NewCurve25519ECDH() ECDH { |
||||
return &curve25519ECDH{} |
||||
} |
||||
|
||||
func (e *curve25519ECDH) GenerateKey(rand io.Reader) (crypto.PrivateKey, crypto.PublicKey, error) { |
||||
var pub, priv [32]byte |
||||
var err error |
||||
|
||||
_, err = io.ReadFull(rand, priv[:]) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
|
||||
priv[0] &= 248 |
||||
priv[31] &= 127 |
||||
priv[31] |= 64 |
||||
|
||||
curve25519.ScalarBaseMult(&pub, &priv) |
||||
|
||||
return &priv, &pub, nil |
||||
} |
||||
|
||||
func (e *curve25519ECDH) Marshal(p crypto.PublicKey) []byte { |
||||
pub := p.(*[32]byte) |
||||
return pub[:] |
||||
} |
||||
|
||||
func (e *curve25519ECDH) Unmarshal(data []byte) (crypto.PublicKey, bool) { |
||||
var pub [32]byte |
||||
if len(data) != 32 { |
||||
return nil, false |
||||
} |
||||
|
||||
copy(pub[:], data) |
||||
return &pub, true |
||||
} |
||||
|
||||
func (e *curve25519ECDH) GenerateSharedSecret(privKey crypto.PrivateKey, pubKey crypto.PublicKey) ([]byte, error) { |
||||
var priv, pub, secret *[32]byte |
||||
|
||||
priv = privKey.(*[32]byte) |
||||
pub = pubKey.(*[32]byte) |
||||
secret = new([32]byte) |
||||
|
||||
curve25519.ScalarMult(secret, priv, pub) |
||||
return secret[:], nil |
||||
} |
@ -0,0 +1,14 @@ |
||||
package ecdh |
||||
|
||||
import ( |
||||
"crypto" |
||||
"io" |
||||
) |
||||
|
||||
// The main interface for ECDH key exchange.
|
||||
type ECDH interface { |
||||
GenerateKey(io.Reader) (crypto.PrivateKey, crypto.PublicKey, error) |
||||
Marshal(crypto.PublicKey) []byte |
||||
Unmarshal([]byte) (crypto.PublicKey, bool) |
||||
GenerateSharedSecret(crypto.PrivateKey, crypto.PublicKey) ([]byte, error) |
||||
} |
@ -0,0 +1,87 @@ |
||||
package ecdh |
||||
|
||||
import ( |
||||
"crypto" |
||||
"crypto/elliptic" |
||||
"io" |
||||
"math/big" |
||||
) |
||||
|
||||
type ellipticECDH struct { |
||||
ECDH |
||||
curve elliptic.Curve |
||||
} |
||||
|
||||
type ellipticPublicKey struct { |
||||
elliptic.Curve |
||||
X, Y *big.Int |
||||
} |
||||
|
||||
type ellipticPrivateKey struct { |
||||
D []byte |
||||
} |
||||
|
||||
// NewEllipticECDH creates a new instance of ECDH with the given elliptic.Curve curve
|
||||
// to use as the elliptical curve for elliptical curve diffie-hellman.
|
||||
func NewEllipticECDH(curve elliptic.Curve) ECDH { |
||||
return &ellipticECDH{ |
||||
curve: curve, |
||||
} |
||||
} |
||||
|
||||
func (e *ellipticECDH) GenerateKey(rand io.Reader) (crypto.PrivateKey, crypto.PublicKey, error) { |
||||
var d []byte |
||||
var x, y *big.Int |
||||
var priv *ellipticPrivateKey |
||||
var pub *ellipticPublicKey |
||||
var err error |
||||
|
||||
d, x, y, err = elliptic.GenerateKey(e.curve, rand) |
||||
if err != nil { |
||||
return nil, nil, err |
||||
} |
||||
|
||||
priv = &ellipticPrivateKey{ |
||||
D: d, |
||||
} |
||||
pub = &ellipticPublicKey{ |
||||
Curve: e.curve, |
||||
X: x, |
||||
Y: y, |
||||
} |
||||
|
||||
return priv, pub, nil |
||||
} |
||||
|
||||
func (e *ellipticECDH) Marshal(p crypto.PublicKey) []byte { |
||||
pub := p.(*ellipticPublicKey) |
||||
return elliptic.Marshal(e.curve, pub.X, pub.Y) |
||||
} |
||||
|
||||
func (e *ellipticECDH) Unmarshal(data []byte) (crypto.PublicKey, bool) { |
||||
var key *ellipticPublicKey |
||||
var x, y *big.Int |
||||
|
||||
x, y = elliptic.Unmarshal(e.curve, data) |
||||
if x == nil || y == nil { |
||||
return key, false |
||||
} |
||||
key = &ellipticPublicKey{ |
||||
Curve: e.curve, |
||||
X: x, |
||||
Y: y, |
||||
} |
||||
return key, true |
||||
} |
||||
|
||||
// GenerateSharedSecret takes in a public key and a private key
|
||||
// and generates a shared secret.
|
||||
//
|
||||
// RFC5903 Section 9 states we should only return x.
|
||||
func (e *ellipticECDH) GenerateSharedSecret(privKey crypto.PrivateKey, pubKey crypto.PublicKey) ([]byte, error) { |
||||
priv := privKey.(*ellipticPrivateKey) |
||||
pub := pubKey.(*ellipticPublicKey) |
||||
|
||||
x, _ := e.curve.ScalarMult(pub.X, pub.Y, priv.D) |
||||
return x.Bytes(), nil |
||||
} |
@ -0,0 +1,512 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm |
||||
|
||||
import "unicode/utf8" |
||||
|
||||
const ( |
||||
maxNonStarters = 30 |
||||
// The maximum number of characters needed for a buffer is
|
||||
// maxNonStarters + 1 for the starter + 1 for the GCJ
|
||||
maxBufferSize = maxNonStarters + 2 |
||||
maxNFCExpansion = 3 // NFC(0x1D160)
|
||||
maxNFKCExpansion = 18 // NFKC(0xFDFA)
|
||||
|
||||
maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
|
||||
) |
||||
|
||||
// ssState is used for reporting the segment state after inserting a rune.
|
||||
// It is returned by streamSafe.next.
|
||||
type ssState int |
||||
|
||||
const ( |
||||
// Indicates a rune was successfully added to the segment.
|
||||
ssSuccess ssState = iota |
||||
// Indicates a rune starts a new segment and should not be added.
|
||||
ssStarter |
||||
// Indicates a rune caused a segment overflow and a CGJ should be inserted.
|
||||
ssOverflow |
||||
) |
||||
|
||||
// streamSafe implements the policy of when a CGJ should be inserted.
|
||||
type streamSafe uint8 |
||||
|
||||
// first inserts the first rune of a segment. It is a faster version of next if
|
||||
// it is known p represents the first rune in a segment.
|
||||
func (ss *streamSafe) first(p Properties) { |
||||
*ss = streamSafe(p.nTrailingNonStarters()) |
||||
} |
||||
|
||||
// insert returns a ssState value to indicate whether a rune represented by p
|
||||
// can be inserted.
|
||||
func (ss *streamSafe) next(p Properties) ssState { |
||||
if *ss > maxNonStarters { |
||||
panic("streamSafe was not reset") |
||||
} |
||||
n := p.nLeadingNonStarters() |
||||
if *ss += streamSafe(n); *ss > maxNonStarters { |
||||
*ss = 0 |
||||
return ssOverflow |
||||
} |
||||
// The Stream-Safe Text Processing prescribes that the counting can stop
|
||||
// as soon as a starter is encountered. However, there are some starters,
|
||||
// like Jamo V and T, that can combine with other runes, leaving their
|
||||
// successive non-starters appended to the previous, possibly causing an
|
||||
// overflow. We will therefore consider any rune with a non-zero nLead to
|
||||
// be a non-starter. Note that it always hold that if nLead > 0 then
|
||||
// nLead == nTrail.
|
||||
if n == 0 { |
||||
*ss = streamSafe(p.nTrailingNonStarters()) |
||||
return ssStarter |
||||
} |
||||
return ssSuccess |
||||
} |
||||
|
||||
// backwards is used for checking for overflow and segment starts
|
||||
// when traversing a string backwards. Users do not need to call first
|
||||
// for the first rune. The state of the streamSafe retains the count of
|
||||
// the non-starters loaded.
|
||||
func (ss *streamSafe) backwards(p Properties) ssState { |
||||
if *ss > maxNonStarters { |
||||
panic("streamSafe was not reset") |
||||
} |
||||
c := *ss + streamSafe(p.nTrailingNonStarters()) |
||||
if c > maxNonStarters { |
||||
return ssOverflow |
||||
} |
||||
*ss = c |
||||
if p.nLeadingNonStarters() == 0 { |
||||
return ssStarter |
||||
} |
||||
return ssSuccess |
||||
} |
||||
|
||||
func (ss streamSafe) isMax() bool { |
||||
return ss == maxNonStarters |
||||
} |
||||
|
||||
// GraphemeJoiner is inserted after maxNonStarters non-starter runes.
|
||||
const GraphemeJoiner = "\u034F" |
||||
|
||||
// reorderBuffer is used to normalize a single segment. Characters inserted with
|
||||
// insert are decomposed and reordered based on CCC. The compose method can
|
||||
// be used to recombine characters. Note that the byte buffer does not hold
|
||||
// the UTF-8 characters in order. Only the rune array is maintained in sorted
|
||||
// order. flush writes the resulting segment to a byte array.
|
||||
type reorderBuffer struct { |
||||
rune [maxBufferSize]Properties // Per character info.
|
||||
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
|
||||
nbyte uint8 // Number or bytes.
|
||||
ss streamSafe // For limiting length of non-starter sequence.
|
||||
nrune int // Number of runeInfos.
|
||||
f formInfo |
||||
|
||||
src input |
||||
nsrc int |
||||
tmpBytes input |
||||
|
||||
out []byte |
||||
flushF func(*reorderBuffer) bool |
||||
} |
||||
|
||||
func (rb *reorderBuffer) init(f Form, src []byte) { |
||||
rb.f = *formTable[f] |
||||
rb.src.setBytes(src) |
||||
rb.nsrc = len(src) |
||||
rb.ss = 0 |
||||
} |
||||
|
||||
func (rb *reorderBuffer) initString(f Form, src string) { |
||||
rb.f = *formTable[f] |
||||
rb.src.setString(src) |
||||
rb.nsrc = len(src) |
||||
rb.ss = 0 |
||||
} |
||||
|
||||
func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) { |
||||
rb.out = out |
||||
rb.flushF = f |
||||
} |
||||
|
||||
// reset discards all characters from the buffer.
|
||||
func (rb *reorderBuffer) reset() { |
||||
rb.nrune = 0 |
||||
rb.nbyte = 0 |
||||
} |
||||
|
||||
func (rb *reorderBuffer) doFlush() bool { |
||||
if rb.f.composing { |
||||
rb.compose() |
||||
} |
||||
res := rb.flushF(rb) |
||||
rb.reset() |
||||
return res |
||||
} |
||||
|
||||
// appendFlush appends the normalized segment to rb.out.
|
||||
func appendFlush(rb *reorderBuffer) bool { |
||||
for i := 0; i < rb.nrune; i++ { |
||||
start := rb.rune[i].pos |
||||
end := start + rb.rune[i].size |
||||
rb.out = append(rb.out, rb.byte[start:end]...) |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// flush appends the normalized segment to out and resets rb.
|
||||
func (rb *reorderBuffer) flush(out []byte) []byte { |
||||
for i := 0; i < rb.nrune; i++ { |
||||
start := rb.rune[i].pos |
||||
end := start + rb.rune[i].size |
||||
out = append(out, rb.byte[start:end]...) |
||||
} |
||||
rb.reset() |
||||
return out |
||||
} |
||||
|
||||
// flushCopy copies the normalized segment to buf and resets rb.
|
||||
// It returns the number of bytes written to buf.
|
||||
func (rb *reorderBuffer) flushCopy(buf []byte) int { |
||||
p := 0 |
||||
for i := 0; i < rb.nrune; i++ { |
||||
runep := rb.rune[i] |
||||
p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size]) |
||||
} |
||||
rb.reset() |
||||
return p |
||||
} |
||||
|
||||
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
|
||||
// It returns false if the buffer is not large enough to hold the rune.
|
||||
// It is used internally by insert and insertString only.
|
||||
func (rb *reorderBuffer) insertOrdered(info Properties) { |
||||
n := rb.nrune |
||||
b := rb.rune[:] |
||||
cc := info.ccc |
||||
if cc > 0 { |
||||
// Find insertion position + move elements to make room.
|
||||
for ; n > 0; n-- { |
||||
if b[n-1].ccc <= cc { |
||||
break |
||||
} |
||||
b[n] = b[n-1] |
||||
} |
||||
} |
||||
rb.nrune += 1 |
||||
pos := uint8(rb.nbyte) |
||||
rb.nbyte += utf8.UTFMax |
||||
info.pos = pos |
||||
b[n] = info |
||||
} |
||||
|
||||
// insertErr is an error code returned by insert. Using this type instead
|
||||
// of error improves performance up to 20% for many of the benchmarks.
|
||||
type insertErr int |
||||
|
||||
const ( |
||||
iSuccess insertErr = -iota |
||||
iShortDst |
||||
iShortSrc |
||||
) |
||||
|
||||
// insertFlush inserts the given rune in the buffer ordered by CCC.
|
||||
// If a decomposition with multiple segments are encountered, they leading
|
||||
// ones are flushed.
|
||||
// It returns a non-zero error code if the rune was not inserted.
|
||||
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr { |
||||
if rune := src.hangul(i); rune != 0 { |
||||
rb.decomposeHangul(rune) |
||||
return iSuccess |
||||
} |
||||
if info.hasDecomposition() { |
||||
return rb.insertDecomposed(info.Decomposition()) |
||||
} |
||||
rb.insertSingle(src, i, info) |
||||
return iSuccess |
||||
} |
||||
|
||||
// insertUnsafe inserts the given rune in the buffer ordered by CCC.
|
||||
// It is assumed there is sufficient space to hold the runes. It is the
|
||||
// responsibility of the caller to ensure this. This can be done by checking
|
||||
// the state returned by the streamSafe type.
|
||||
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) { |
||||
if rune := src.hangul(i); rune != 0 { |
||||
rb.decomposeHangul(rune) |
||||
} |
||||
if info.hasDecomposition() { |
||||
// TODO: inline.
|
||||
rb.insertDecomposed(info.Decomposition()) |
||||
} else { |
||||
rb.insertSingle(src, i, info) |
||||
} |
||||
} |
||||
|
||||
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
|
||||
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
|
||||
// It flushes the buffer on each new segment start.
|
||||
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr { |
||||
rb.tmpBytes.setBytes(dcomp) |
||||
// As the streamSafe accounting already handles the counting for modifiers,
|
||||
// we don't have to call next. However, we do need to keep the accounting
|
||||
// intact when flushing the buffer.
|
||||
for i := 0; i < len(dcomp); { |
||||
info := rb.f.info(rb.tmpBytes, i) |
||||
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() { |
||||
return iShortDst |
||||
} |
||||
i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)]) |
||||
rb.insertOrdered(info) |
||||
} |
||||
return iSuccess |
||||
} |
||||
|
||||
// insertSingle inserts an entry in the reorderBuffer for the rune at
|
||||
// position i. info is the runeInfo for the rune at position i.
|
||||
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) { |
||||
src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size)) |
||||
rb.insertOrdered(info) |
||||
} |
||||
|
||||
// insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
|
||||
func (rb *reorderBuffer) insertCGJ() { |
||||
rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))}) |
||||
} |
||||
|
||||
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
|
||||
func (rb *reorderBuffer) appendRune(r rune) { |
||||
bn := rb.nbyte |
||||
sz := utf8.EncodeRune(rb.byte[bn:], rune(r)) |
||||
rb.nbyte += utf8.UTFMax |
||||
rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)} |
||||
rb.nrune++ |
||||
} |
||||
|
||||
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) assignRune(pos int, r rune) { |
||||
bn := rb.rune[pos].pos |
||||
sz := utf8.EncodeRune(rb.byte[bn:], rune(r)) |
||||
rb.rune[pos] = Properties{pos: bn, size: uint8(sz)} |
||||
} |
||||
|
||||
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) runeAt(n int) rune { |
||||
inf := rb.rune[n] |
||||
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size]) |
||||
return r |
||||
} |
||||
|
||||
// bytesAt returns the UTF-8 encoding of the rune at position n.
|
||||
// It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) bytesAt(n int) []byte { |
||||
inf := rb.rune[n] |
||||
return rb.byte[inf.pos : int(inf.pos)+int(inf.size)] |
||||
} |
||||
|
||||
// For Hangul we combine algorithmically, instead of using tables.
|
||||
const ( |
||||
hangulBase = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
|
||||
hangulBase0 = 0xEA |
||||
hangulBase1 = 0xB0 |
||||
hangulBase2 = 0x80 |
||||
|
||||
hangulEnd = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
|
||||
hangulEnd0 = 0xED |
||||
hangulEnd1 = 0x9E |
||||
hangulEnd2 = 0xA4 |
||||
|
||||
jamoLBase = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
|
||||
jamoLBase0 = 0xE1 |
||||
jamoLBase1 = 0x84 |
||||
jamoLEnd = 0x1113 |
||||
jamoVBase = 0x1161 |
||||
jamoVEnd = 0x1176 |
||||
jamoTBase = 0x11A7 |
||||
jamoTEnd = 0x11C3 |
||||
|
||||
jamoTCount = 28 |
||||
jamoVCount = 21 |
||||
jamoVTCount = 21 * 28 |
||||
jamoLVTCount = 19 * 21 * 28 |
||||
) |
||||
|
||||
const hangulUTF8Size = 3 |
||||
|
||||
func isHangul(b []byte) bool { |
||||
if len(b) < hangulUTF8Size { |
||||
return false |
||||
} |
||||
b0 := b[0] |
||||
if b0 < hangulBase0 { |
||||
return false |
||||
} |
||||
b1 := b[1] |
||||
switch { |
||||
case b0 == hangulBase0: |
||||
return b1 >= hangulBase1 |
||||
case b0 < hangulEnd0: |
||||
return true |
||||
case b0 > hangulEnd0: |
||||
return false |
||||
case b1 < hangulEnd1: |
||||
return true |
||||
} |
||||
return b1 == hangulEnd1 && b[2] < hangulEnd2 |
||||
} |
||||
|
||||
func isHangulString(b string) bool { |
||||
if len(b) < hangulUTF8Size { |
||||
return false |
||||
} |
||||
b0 := b[0] |
||||
if b0 < hangulBase0 { |
||||
return false |
||||
} |
||||
b1 := b[1] |
||||
switch { |
||||
case b0 == hangulBase0: |
||||
return b1 >= hangulBase1 |
||||
case b0 < hangulEnd0: |
||||
return true |
||||
case b0 > hangulEnd0: |
||||
return false |
||||
case b1 < hangulEnd1: |
||||
return true |
||||
} |
||||
return b1 == hangulEnd1 && b[2] < hangulEnd2 |
||||
} |
||||
|
||||
// Caller must ensure len(b) >= 2.
|
||||
func isJamoVT(b []byte) bool { |
||||
// True if (rune & 0xff00) == jamoLBase
|
||||
return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1 |
||||
} |
||||
|
||||
func isHangulWithoutJamoT(b []byte) bool { |
||||
c, _ := utf8.DecodeRune(b) |
||||
c -= hangulBase |
||||
return c < jamoLVTCount && c%jamoTCount == 0 |
||||
} |
||||
|
||||
// decomposeHangul writes the decomposed Hangul to buf and returns the number
|
||||
// of bytes written. len(buf) should be at least 9.
|
||||
func decomposeHangul(buf []byte, r rune) int { |
||||
const JamoUTF8Len = 3 |
||||
r -= hangulBase |
||||
x := r % jamoTCount |
||||
r /= jamoTCount |
||||
utf8.EncodeRune(buf, jamoLBase+r/jamoVCount) |
||||
utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount) |
||||
if x != 0 { |
||||
utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x) |
||||
return 3 * JamoUTF8Len |
||||
} |
||||
return 2 * JamoUTF8Len |
||||
} |
||||
|
||||
// decomposeHangul algorithmically decomposes a Hangul rune into
|
||||
// its Jamo components.
|
||||
// See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
|
||||
func (rb *reorderBuffer) decomposeHangul(r rune) { |
||||
r -= hangulBase |
||||
x := r % jamoTCount |
||||
r /= jamoTCount |
||||
rb.appendRune(jamoLBase + r/jamoVCount) |
||||
rb.appendRune(jamoVBase + r%jamoVCount) |
||||
if x != 0 { |
||||
rb.appendRune(jamoTBase + x) |
||||
} |
||||
} |
||||
|
||||
// combineHangul algorithmically combines Jamo character components into Hangul.
|
||||
// See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
|
||||
func (rb *reorderBuffer) combineHangul(s, i, k int) { |
||||
b := rb.rune[:] |
||||
bn := rb.nrune |
||||
for ; i < bn; i++ { |
||||
cccB := b[k-1].ccc |
||||
cccC := b[i].ccc |
||||
if cccB == 0 { |
||||
s = k - 1 |
||||
} |
||||
if s != k-1 && cccB >= cccC { |
||||
// b[i] is blocked by greater-equal cccX below it
|
||||
b[k] = b[i] |
||||
k++ |
||||
} else { |
||||
l := rb.runeAt(s) // also used to compare to hangulBase
|
||||
v := rb.runeAt(i) // also used to compare to jamoT
|
||||
switch { |
||||
case jamoLBase <= l && l < jamoLEnd && |
||||
jamoVBase <= v && v < jamoVEnd: |
||||
// 11xx plus 116x to LV
|
||||
rb.assignRune(s, hangulBase+ |
||||
(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount) |
||||
case hangulBase <= l && l < hangulEnd && |
||||
jamoTBase < v && v < jamoTEnd && |
||||
((l-hangulBase)%jamoTCount) == 0: |
||||
// ACxx plus 11Ax to LVT
|
||||
rb.assignRune(s, l+v-jamoTBase) |
||||
default: |
||||
b[k] = b[i] |
||||
k++ |
||||
} |
||||
} |
||||
} |
||||
rb.nrune = k |
||||
} |
||||
|
||||
// compose recombines the runes in the buffer.
|
||||
// It should only be used to recompose a single segment, as it will not
|
||||
// handle alternations between Hangul and non-Hangul characters correctly.
|
||||
func (rb *reorderBuffer) compose() { |
||||
// Lazily load the map used by the combine func below, but do
|
||||
// it outside of the loop.
|
||||
recompMapOnce.Do(buildRecompMap) |
||||
|
||||
// UAX #15, section X5 , including Corrigendum #5
|
||||
// "In any character sequence beginning with starter S, a character C is
|
||||
// blocked from S if and only if there is some character B between S
|
||||
// and C, and either B is a starter or it has the same or higher
|
||||
// combining class as C."
|
||||
bn := rb.nrune |
||||
if bn == 0 { |
||||
return |
||||
} |
||||
k := 1 |
||||
b := rb.rune[:] |
||||
for s, i := 0, 1; i < bn; i++ { |
||||
if isJamoVT(rb.bytesAt(i)) { |
||||
// Redo from start in Hangul mode. Necessary to support
|
||||
// U+320E..U+321E in NFKC mode.
|
||||
rb.combineHangul(s, i, k) |
||||
return |
||||
} |
||||
ii := b[i] |
||||
// We can only use combineForward as a filter if we later
|
||||
// get the info for the combined character. This is more
|
||||
// expensive than using the filter. Using combinesBackward()
|
||||
// is safe.
|
||||
if ii.combinesBackward() { |
||||
cccB := b[k-1].ccc |
||||
cccC := ii.ccc |
||||
blocked := false // b[i] blocked by starter or greater or equal CCC?
|
||||
if cccB == 0 { |
||||
s = k - 1 |
||||
} else { |
||||
blocked = s != k-1 && cccB >= cccC |
||||
} |
||||
if !blocked { |
||||
combined := combine(rb.runeAt(s), rb.runeAt(i)) |
||||
if combined != 0 { |
||||
rb.assignRune(s, combined) |
||||
continue |
||||
} |
||||
} |
||||
} |
||||
b[k] = b[i] |
||||
k++ |
||||
} |
||||
rb.nrune = k |
||||
} |
@ -0,0 +1,278 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm |
||||
|
||||
import "encoding/binary" |
||||
|
||||
// This file contains Form-specific logic and wrappers for data in tables.go.
|
||||
|
||||
// Rune info is stored in a separate trie per composing form. A composing form
|
||||
// and its corresponding decomposing form share the same trie. Each trie maps
|
||||
// a rune to a uint16. The values take two forms. For v >= 0x8000:
|
||||
// bits
|
||||
// 15: 1 (inverse of NFD_QC bit of qcInfo)
|
||||
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
|
||||
// 6..0: ccc (compressed CCC value).
|
||||
// For v < 0x8000, the respective rune has a decomposition and v is an index
|
||||
// into a byte array of UTF-8 decomposition sequences and additional info and
|
||||
// has the form:
|
||||
// <header> <decomp_byte>* [<tccc> [<lccc>]]
|
||||
// The header contains the number of bytes in the decomposition (excluding this
|
||||
// length byte). The two most significant bits of this length byte correspond
|
||||
// to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1.
|
||||
// The byte sequence is followed by a trailing and leading CCC if the values
|
||||
// for these are not zero. The value of v determines which ccc are appended
|
||||
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
|
||||
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
|
||||
// there is an additional leading ccc. The value of tccc itself is the
|
||||
// trailing CCC shifted left 2 bits. The two least-significant bits of tccc
|
||||
// are the number of trailing non-starters.
|
||||
|
||||
const ( |
||||
qcInfoMask = 0x3F // to clear all but the relevant bits in a qcInfo
|
||||
headerLenMask = 0x3F // extract the length value from the header byte
|
||||
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
|
||||
) |
||||
|
||||
// Properties provides access to normalization properties of a rune.
|
||||
type Properties struct { |
||||
pos uint8 // start position in reorderBuffer; used in composition.go
|
||||
size uint8 // length of UTF-8 encoding of this rune
|
||||
ccc uint8 // leading canonical combining class (ccc if not decomposition)
|
||||
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
|
||||
nLead uint8 // number of leading non-starters.
|
||||
flags qcInfo // quick check flags
|
||||
index uint16 |
||||
} |
||||
|
||||
// functions dispatchable per form
|
||||
type lookupFunc func(b input, i int) Properties |
||||
|
||||
// formInfo holds Form-specific functions and tables.
|
||||
type formInfo struct { |
||||
form Form |
||||
composing, compatibility bool // form type
|
||||
info lookupFunc |
||||
nextMain iterFunc |
||||
} |
||||
|
||||
var formTable = []*formInfo{{ |
||||
form: NFC, |
||||
composing: true, |
||||
compatibility: false, |
||||
info: lookupInfoNFC, |
||||
nextMain: nextComposed, |
||||
}, { |
||||
form: NFD, |
||||
composing: false, |
||||
compatibility: false, |
||||
info: lookupInfoNFC, |
||||
nextMain: nextDecomposed, |
||||
}, { |
||||
form: NFKC, |
||||
composing: true, |
||||
compatibility: true, |
||||
info: lookupInfoNFKC, |
||||
nextMain: nextComposed, |
||||
}, { |
||||
form: NFKD, |
||||
composing: false, |
||||
compatibility: true, |
||||
info: lookupInfoNFKC, |
||||
nextMain: nextDecomposed, |
||||
}} |
||||
|
||||
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
|
||||
// unexpected behavior for the user. For example, in NFD, there is a boundary
|
||||
// after 'a'. However, 'a' might combine with modifiers, so from the application's
|
||||
// perspective it is not a good boundary. We will therefore always use the
|
||||
// boundaries for the combining variants.
|
||||
|
||||
// BoundaryBefore returns true if this rune starts a new segment and
|
||||
// cannot combine with any rune on the left.
|
||||
func (p Properties) BoundaryBefore() bool { |
||||
if p.ccc == 0 && !p.combinesBackward() { |
||||
return true |
||||
} |
||||
// We assume that the CCC of the first character in a decomposition
|
||||
// is always non-zero if different from info.ccc and that we can return
|
||||
// false at this point. This is verified by maketables.
|
||||
return false |
||||
} |
||||
|
||||
// BoundaryAfter returns true if runes cannot combine with or otherwise
|
||||
// interact with this or previous runes.
|
||||
func (p Properties) BoundaryAfter() bool { |
||||
// TODO: loosen these conditions.
|
||||
return p.isInert() |
||||
} |
||||
|
||||
// We pack quick check data in 4 bits:
|
||||
// 5: Combines forward (0 == false, 1 == true)
|
||||
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
|
||||
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
|
||||
// 1..0: Number of trailing non-starters.
|
||||
//
|
||||
// When all 4 bits are zero, the character is inert, meaning it is never
|
||||
// influenced by normalization.
|
||||
type qcInfo uint8 |
||||
|
||||
func (p Properties) isYesC() bool { return p.flags&0x10 == 0 } |
||||
func (p Properties) isYesD() bool { return p.flags&0x4 == 0 } |
||||
|
||||
func (p Properties) combinesForward() bool { return p.flags&0x20 != 0 } |
||||
func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe
|
||||
func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD
|
||||
|
||||
func (p Properties) isInert() bool { |
||||
return p.flags&qcInfoMask == 0 && p.ccc == 0 |
||||
} |
||||
|
||||
func (p Properties) multiSegment() bool { |
||||
return p.index >= firstMulti && p.index < endMulti |
||||
} |
||||
|
||||
func (p Properties) nLeadingNonStarters() uint8 { |
||||
return p.nLead |
||||
} |
||||
|
||||
func (p Properties) nTrailingNonStarters() uint8 { |
||||
return uint8(p.flags & 0x03) |
||||
} |
||||
|
||||
// Decomposition returns the decomposition for the underlying rune
|
||||
// or nil if there is none.
|
||||
func (p Properties) Decomposition() []byte { |
||||
// TODO: create the decomposition for Hangul?
|
||||
if p.index == 0 { |
||||
return nil |
||||
} |
||||
i := p.index |
||||
n := decomps[i] & headerLenMask |
||||
i++ |
||||
return decomps[i : i+uint16(n)] |
||||
} |
||||
|
||||
// Size returns the length of UTF-8 encoding of the rune.
|
||||
func (p Properties) Size() int { |
||||
return int(p.size) |
||||
} |
||||
|
||||
// CCC returns the canonical combining class of the underlying rune.
|
||||
func (p Properties) CCC() uint8 { |
||||
if p.index >= firstCCCZeroExcept { |
||||
return 0 |
||||
} |
||||
return ccc[p.ccc] |
||||
} |
||||
|
||||
// LeadCCC returns the CCC of the first rune in the decomposition.
|
||||
// If there is no decomposition, LeadCCC equals CCC.
|
||||
func (p Properties) LeadCCC() uint8 { |
||||
return ccc[p.ccc] |
||||
} |
||||
|
||||
// TrailCCC returns the CCC of the last rune in the decomposition.
|
||||
// If there is no decomposition, TrailCCC equals CCC.
|
||||
func (p Properties) TrailCCC() uint8 { |
||||
return ccc[p.tccc] |
||||
} |
||||
|
||||
func buildRecompMap() { |
||||
recompMap = make(map[uint32]rune, len(recompMapPacked)/8) |
||||
var buf [8]byte |
||||
for i := 0; i < len(recompMapPacked); i += 8 { |
||||
copy(buf[:], recompMapPacked[i:i+8]) |
||||
key := binary.BigEndian.Uint32(buf[:4]) |
||||
val := binary.BigEndian.Uint32(buf[4:]) |
||||
recompMap[key] = rune(val) |
||||
} |
||||
} |
||||
|
||||
// Recomposition
|
||||
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
|
||||
// This clips off the bits of three entries, but we know this will not
|
||||
// result in a collision. In the unlikely event that changes to
|
||||
// UnicodeData.txt introduce collisions, the compiler will catch it.
|
||||
// Note that the recomposition map for NFC and NFKC are identical.
|
||||
|
||||
// combine returns the combined rune or 0 if it doesn't exist.
|
||||
//
|
||||
// The caller is responsible for calling
|
||||
// recompMapOnce.Do(buildRecompMap) sometime before this is called.
|
||||
func combine(a, b rune) rune { |
||||
key := uint32(uint16(a))<<16 + uint32(uint16(b)) |
||||
if recompMap == nil { |
||||
panic("caller error") // see func comment
|
||||
} |
||||
return recompMap[key] |
||||
} |
||||
|
||||
func lookupInfoNFC(b input, i int) Properties { |
||||
v, sz := b.charinfoNFC(i) |
||||
return compInfo(v, sz) |
||||
} |
||||
|
||||
func lookupInfoNFKC(b input, i int) Properties { |
||||
v, sz := b.charinfoNFKC(i) |
||||
return compInfo(v, sz) |
||||
} |
||||
|
||||
// Properties returns properties for the first rune in s.
|
||||
func (f Form) Properties(s []byte) Properties { |
||||
if f == NFC || f == NFD { |
||||
return compInfo(nfcData.lookup(s)) |
||||
} |
||||
return compInfo(nfkcData.lookup(s)) |
||||
} |
||||
|
||||
// PropertiesString returns properties for the first rune in s.
|
||||
func (f Form) PropertiesString(s string) Properties { |
||||
if f == NFC || f == NFD { |
||||
return compInfo(nfcData.lookupString(s)) |
||||
} |
||||
return compInfo(nfkcData.lookupString(s)) |
||||
} |
||||
|
||||
// compInfo converts the information contained in v and sz
|
||||
// to a Properties. See the comment at the top of the file
|
||||
// for more information on the format.
|
||||
func compInfo(v uint16, sz int) Properties { |
||||
if v == 0 { |
||||
return Properties{size: uint8(sz)} |
||||
} else if v >= 0x8000 { |
||||
p := Properties{ |
||||
size: uint8(sz), |
||||
ccc: uint8(v), |
||||
tccc: uint8(v), |
||||
flags: qcInfo(v >> 8), |
||||
} |
||||
if p.ccc > 0 || p.combinesBackward() { |
||||
p.nLead = uint8(p.flags & 0x3) |
||||
} |
||||
return p |
||||
} |
||||
// has decomposition
|
||||
h := decomps[v] |
||||
f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4 |
||||
p := Properties{size: uint8(sz), flags: f, index: v} |
||||
if v >= firstCCC { |
||||
v += uint16(h&headerLenMask) + 1 |
||||
c := decomps[v] |
||||
p.tccc = c >> 2 |
||||
p.flags |= qcInfo(c & 0x3) |
||||
if v >= firstLeadingCCC { |
||||
p.nLead = c & 0x3 |
||||
if v >= firstStarterWithNLead { |
||||
// We were tricked. Remove the decomposition.
|
||||
p.flags &= 0x03 |
||||
p.index = 0 |
||||
return p |
||||
} |
||||
p.ccc = decomps[v+1] |
||||
} |
||||
} |
||||
return p |
||||
} |
@ -0,0 +1,109 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm |
||||
|
||||
import "unicode/utf8" |
||||
|
||||
type input struct { |
||||
str string |
||||
bytes []byte |
||||
} |
||||
|
||||
func inputBytes(str []byte) input { |
||||
return input{bytes: str} |
||||
} |
||||
|
||||
func inputString(str string) input { |
||||
return input{str: str} |
||||
} |
||||
|
||||
func (in *input) setBytes(str []byte) { |
||||
in.str = "" |
||||
in.bytes = str |
||||
} |
||||
|
||||
func (in *input) setString(str string) { |
||||
in.str = str |
||||
in.bytes = nil |
||||
} |
||||
|
||||
func (in *input) _byte(p int) byte { |
||||
if in.bytes == nil { |
||||
return in.str[p] |
||||
} |
||||
return in.bytes[p] |
||||
} |
||||
|
||||
func (in *input) skipASCII(p, max int) int { |
||||
if in.bytes == nil { |
||||
for ; p < max && in.str[p] < utf8.RuneSelf; p++ { |
||||
} |
||||
} else { |
||||
for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ { |
||||
} |
||||
} |
||||
return p |
||||
} |
||||
|
||||
func (in *input) skipContinuationBytes(p int) int { |
||||
if in.bytes == nil { |
||||
for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ { |
||||
} |
||||
} else { |
||||
for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ { |
||||
} |
||||
} |
||||
return p |
||||
} |
||||
|
||||
func (in *input) appendSlice(buf []byte, b, e int) []byte { |
||||
if in.bytes != nil { |
||||
return append(buf, in.bytes[b:e]...) |
||||
} |
||||
for i := b; i < e; i++ { |
||||
buf = append(buf, in.str[i]) |
||||
} |
||||
return buf |
||||
} |
||||
|
||||
func (in *input) copySlice(buf []byte, b, e int) int { |
||||
if in.bytes == nil { |
||||
return copy(buf, in.str[b:e]) |
||||
} |
||||
return copy(buf, in.bytes[b:e]) |
||||
} |
||||
|
||||
func (in *input) charinfoNFC(p int) (uint16, int) { |
||||
if in.bytes == nil { |
||||
return nfcData.lookupString(in.str[p:]) |
||||
} |
||||
return nfcData.lookup(in.bytes[p:]) |
||||
} |
||||
|
||||
func (in *input) charinfoNFKC(p int) (uint16, int) { |
||||
if in.bytes == nil { |
||||
return nfkcData.lookupString(in.str[p:]) |
||||
} |
||||
return nfkcData.lookup(in.bytes[p:]) |
||||
} |
||||
|
||||
func (in *input) hangul(p int) (r rune) { |
||||
var size int |
||||
if in.bytes == nil { |
||||
if !isHangulString(in.str[p:]) { |
||||
return 0 |
||||
} |
||||
r, size = utf8.DecodeRuneInString(in.str[p:]) |
||||
} else { |
||||
if !isHangul(in.bytes[p:]) { |
||||
return 0 |
||||
} |
||||
r, size = utf8.DecodeRune(in.bytes[p:]) |
||||
} |
||||
if size != hangulUTF8Size { |
||||
return 0 |
||||
} |
||||
return r |
||||
} |
@ -0,0 +1,458 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm |
||||
|
||||
import ( |
||||
"fmt" |
||||
"unicode/utf8" |
||||
) |
||||
|
||||
// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
|
||||
// sequence of starter and non-starter runes for the purpose of normalization.
|
||||
const MaxSegmentSize = maxByteBufferSize |
||||
|
||||
// An Iter iterates over a string or byte slice, while normalizing it
|
||||
// to a given Form.
|
||||
type Iter struct { |
||||
rb reorderBuffer |
||||
buf [maxByteBufferSize]byte |
||||
info Properties // first character saved from previous iteration
|
||||
next iterFunc // implementation of next depends on form
|
||||
asciiF iterFunc |
||||
|
||||
p int // current position in input source
|
||||
multiSeg []byte // remainder of multi-segment decomposition
|
||||
} |
||||
|
||||
type iterFunc func(*Iter) []byte |
||||
|
||||
// Init initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) Init(f Form, src []byte) { |
||||
i.p = 0 |
||||
if len(src) == 0 { |
||||
i.setDone() |
||||
i.rb.nsrc = 0 |
||||
return |
||||
} |
||||
i.multiSeg = nil |
||||
i.rb.init(f, src) |
||||
i.next = i.rb.f.nextMain |
||||
i.asciiF = nextASCIIBytes |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
i.rb.ss.first(i.info) |
||||
} |
||||
|
||||
// InitString initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) InitString(f Form, src string) { |
||||
i.p = 0 |
||||
if len(src) == 0 { |
||||
i.setDone() |
||||
i.rb.nsrc = 0 |
||||
return |
||||
} |
||||
i.multiSeg = nil |
||||
i.rb.initString(f, src) |
||||
i.next = i.rb.f.nextMain |
||||
i.asciiF = nextASCIIString |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
i.rb.ss.first(i.info) |
||||
} |
||||
|
||||
// Seek sets the segment to be returned by the next call to Next to start
|
||||
// at position p. It is the responsibility of the caller to set p to the
|
||||
// start of a segment.
|
||||
func (i *Iter) Seek(offset int64, whence int) (int64, error) { |
||||
var abs int64 |
||||
switch whence { |
||||
case 0: |
||||
abs = offset |
||||
case 1: |
||||
abs = int64(i.p) + offset |
||||
case 2: |
||||
abs = int64(i.rb.nsrc) + offset |
||||
default: |
||||
return 0, fmt.Errorf("norm: invalid whence") |
||||
} |
||||
if abs < 0 { |
||||
return 0, fmt.Errorf("norm: negative position") |
||||
} |
||||
if int(abs) >= i.rb.nsrc { |
||||
i.setDone() |
||||
return int64(i.p), nil |
||||
} |
||||
i.p = int(abs) |
||||
i.multiSeg = nil |
||||
i.next = i.rb.f.nextMain |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
i.rb.ss.first(i.info) |
||||
return abs, nil |
||||
} |
||||
|
||||
// returnSlice returns a slice of the underlying input type as a byte slice.
|
||||
// If the underlying is of type []byte, it will simply return a slice.
|
||||
// If the underlying is of type string, it will copy the slice to the buffer
|
||||
// and return that.
|
||||
func (i *Iter) returnSlice(a, b int) []byte { |
||||
if i.rb.src.bytes == nil { |
||||
return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])] |
||||
} |
||||
return i.rb.src.bytes[a:b] |
||||
} |
||||
|
||||
// Pos returns the byte position at which the next call to Next will commence processing.
|
||||
func (i *Iter) Pos() int { |
||||
return i.p |
||||
} |
||||
|
||||
func (i *Iter) setDone() { |
||||
i.next = nextDone |
||||
i.p = i.rb.nsrc |
||||
} |
||||
|
||||
// Done returns true if there is no more input to process.
|
||||
func (i *Iter) Done() bool { |
||||
return i.p >= i.rb.nsrc |
||||
} |
||||
|
||||
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
|
||||
// For any input a and b for which f(a) == f(b), subsequent calls
|
||||
// to Next will return the same segments.
|
||||
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
|
||||
// Although not guaranteed, n will typically be the smallest possible n.
|
||||
func (i *Iter) Next() []byte { |
||||
return i.next(i) |
||||
} |
||||
|
||||
func nextASCIIBytes(i *Iter) []byte { |
||||
p := i.p + 1 |
||||
if p >= i.rb.nsrc { |
||||
p0 := i.p |
||||
i.setDone() |
||||
return i.rb.src.bytes[p0:p] |
||||
} |
||||
if i.rb.src.bytes[p] < utf8.RuneSelf { |
||||
p0 := i.p |
||||
i.p = p |
||||
return i.rb.src.bytes[p0:p] |
||||
} |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
i.next = i.rb.f.nextMain |
||||
return i.next(i) |
||||
} |
||||
|
||||
func nextASCIIString(i *Iter) []byte { |
||||
p := i.p + 1 |
||||
if p >= i.rb.nsrc { |
||||
i.buf[0] = i.rb.src.str[i.p] |
||||
i.setDone() |
||||
return i.buf[:1] |
||||
} |
||||
if i.rb.src.str[p] < utf8.RuneSelf { |
||||
i.buf[0] = i.rb.src.str[i.p] |
||||
i.p = p |
||||
return i.buf[:1] |
||||
} |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
i.next = i.rb.f.nextMain |
||||
return i.next(i) |
||||
} |
||||
|
||||
func nextHangul(i *Iter) []byte { |
||||
p := i.p |
||||
next := p + hangulUTF8Size |
||||
if next >= i.rb.nsrc { |
||||
i.setDone() |
||||
} else if i.rb.src.hangul(next) == 0 { |
||||
i.rb.ss.next(i.info) |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
i.next = i.rb.f.nextMain |
||||
return i.next(i) |
||||
} |
||||
i.p = next |
||||
return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))] |
||||
} |
||||
|
||||
func nextDone(i *Iter) []byte { |
||||
return nil |
||||
} |
||||
|
||||
// nextMulti is used for iterating over multi-segment decompositions
|
||||
// for decomposing normal forms.
|
||||
func nextMulti(i *Iter) []byte { |
||||
j := 0 |
||||
d := i.multiSeg |
||||
// skip first rune
|
||||
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ { |
||||
} |
||||
for j < len(d) { |
||||
info := i.rb.f.info(input{bytes: d}, j) |
||||
if info.BoundaryBefore() { |
||||
i.multiSeg = d[j:] |
||||
return d[:j] |
||||
} |
||||
j += int(info.size) |
||||
} |
||||
// treat last segment as normal decomposition
|
||||
i.next = i.rb.f.nextMain |
||||
return i.next(i) |
||||
} |
||||
|
||||
// nextMultiNorm is used for iterating over multi-segment decompositions
|
||||
// for composing normal forms.
|
||||
func nextMultiNorm(i *Iter) []byte { |
||||
j := 0 |
||||
d := i.multiSeg |
||||
for j < len(d) { |
||||
info := i.rb.f.info(input{bytes: d}, j) |
||||
if info.BoundaryBefore() { |
||||
i.rb.compose() |
||||
seg := i.buf[:i.rb.flushCopy(i.buf[:])] |
||||
i.rb.insertUnsafe(input{bytes: d}, j, info) |
||||
i.multiSeg = d[j+int(info.size):] |
||||
return seg |
||||
} |
||||
i.rb.insertUnsafe(input{bytes: d}, j, info) |
||||
j += int(info.size) |
||||
} |
||||
i.multiSeg = nil |
||||
i.next = nextComposed |
||||
return doNormComposed(i) |
||||
} |
||||
|
||||
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
|
||||
func nextDecomposed(i *Iter) (next []byte) { |
||||
outp := 0 |
||||
inCopyStart, outCopyStart := i.p, 0 |
||||
for { |
||||
if sz := int(i.info.size); sz <= 1 { |
||||
i.rb.ss = 0 |
||||
p := i.p |
||||
i.p++ // ASCII or illegal byte. Either way, advance by 1.
|
||||
if i.p >= i.rb.nsrc { |
||||
i.setDone() |
||||
return i.returnSlice(p, i.p) |
||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf { |
||||
i.next = i.asciiF |
||||
return i.returnSlice(p, i.p) |
||||
} |
||||
outp++ |
||||
} else if d := i.info.Decomposition(); d != nil { |
||||
// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
|
||||
// Case 1: there is a leftover to copy. In this case the decomposition
|
||||
// must begin with a modifier and should always be appended.
|
||||
// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
|
||||
p := outp + len(d) |
||||
if outp > 0 { |
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) |
||||
// TODO: this condition should not be possible, but we leave it
|
||||
// in for defensive purposes.
|
||||
if p > len(i.buf) { |
||||
return i.buf[:outp] |
||||
} |
||||
} else if i.info.multiSegment() { |
||||
// outp must be 0 as multi-segment decompositions always
|
||||
// start a new segment.
|
||||
if i.multiSeg == nil { |
||||
i.multiSeg = d |
||||
i.next = nextMulti |
||||
return nextMulti(i) |
||||
} |
||||
// We are in the last segment. Treat as normal decomposition.
|
||||
d = i.multiSeg |
||||
i.multiSeg = nil |
||||
p = len(d) |
||||
} |
||||
prevCC := i.info.tccc |
||||
if i.p += sz; i.p >= i.rb.nsrc { |
||||
i.setDone() |
||||
i.info = Properties{} // Force BoundaryBefore to succeed.
|
||||
} else { |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
} |
||||
switch i.rb.ss.next(i.info) { |
||||
case ssOverflow: |
||||
i.next = nextCGJDecompose |
||||
fallthrough |
||||
case ssStarter: |
||||
if outp > 0 { |
||||
copy(i.buf[outp:], d) |
||||
return i.buf[:p] |
||||
} |
||||
return d |
||||
} |
||||
copy(i.buf[outp:], d) |
||||
outp = p |
||||
inCopyStart, outCopyStart = i.p, outp |
||||
if i.info.ccc < prevCC { |
||||
goto doNorm |
||||
} |
||||
continue |
||||
} else if r := i.rb.src.hangul(i.p); r != 0 { |
||||
outp = decomposeHangul(i.buf[:], r) |
||||
i.p += hangulUTF8Size |
||||
inCopyStart, outCopyStart = i.p, outp |
||||
if i.p >= i.rb.nsrc { |
||||
i.setDone() |
||||
break |
||||
} else if i.rb.src.hangul(i.p) != 0 { |
||||
i.next = nextHangul |
||||
return i.buf[:outp] |
||||
} |
||||
} else { |
||||
p := outp + sz |
||||
if p > len(i.buf) { |
||||
break |
||||
} |
||||
outp = p |
||||
i.p += sz |
||||
} |
||||
if i.p >= i.rb.nsrc { |
||||
i.setDone() |
||||
break |
||||
} |
||||
prevCC := i.info.tccc |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
if v := i.rb.ss.next(i.info); v == ssStarter { |
||||
break |
||||
} else if v == ssOverflow { |
||||
i.next = nextCGJDecompose |
||||
break |
||||
} |
||||
if i.info.ccc < prevCC { |
||||
goto doNorm |
||||
} |
||||
} |
||||
if outCopyStart == 0 { |
||||
return i.returnSlice(inCopyStart, i.p) |
||||
} else if inCopyStart < i.p { |
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) |
||||
} |
||||
return i.buf[:outp] |
||||
doNorm: |
||||
// Insert what we have decomposed so far in the reorderBuffer.
|
||||
// As we will only reorder, there will always be enough room.
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p) |
||||
i.rb.insertDecomposed(i.buf[0:outp]) |
||||
return doNormDecomposed(i) |
||||
} |
||||
|
||||
func doNormDecomposed(i *Iter) []byte { |
||||
for { |
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info) |
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc { |
||||
i.setDone() |
||||
break |
||||
} |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
if i.info.ccc == 0 { |
||||
break |
||||
} |
||||
if s := i.rb.ss.next(i.info); s == ssOverflow { |
||||
i.next = nextCGJDecompose |
||||
break |
||||
} |
||||
} |
||||
// new segment or too many combining characters: exit normalization
|
||||
return i.buf[:i.rb.flushCopy(i.buf[:])] |
||||
} |
||||
|
||||
func nextCGJDecompose(i *Iter) []byte { |
||||
i.rb.ss = 0 |
||||
i.rb.insertCGJ() |
||||
i.next = nextDecomposed |
||||
i.rb.ss.first(i.info) |
||||
buf := doNormDecomposed(i) |
||||
return buf |
||||
} |
||||
|
||||
// nextComposed is the implementation of Next for forms NFC and NFKC.
|
||||
func nextComposed(i *Iter) []byte { |
||||
outp, startp := 0, i.p |
||||
var prevCC uint8 |
||||
for { |
||||
if !i.info.isYesC() { |
||||
goto doNorm |
||||
} |
||||
prevCC = i.info.tccc |
||||
sz := int(i.info.size) |
||||
if sz == 0 { |
||||
sz = 1 // illegal rune: copy byte-by-byte
|
||||
} |
||||
p := outp + sz |
||||
if p > len(i.buf) { |
||||
break |
||||
} |
||||
outp = p |
||||
i.p += sz |
||||
if i.p >= i.rb.nsrc { |
||||
i.setDone() |
||||
break |
||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf { |
||||
i.rb.ss = 0 |
||||
i.next = i.asciiF |
||||
break |
||||
} |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
if v := i.rb.ss.next(i.info); v == ssStarter { |
||||
break |
||||
} else if v == ssOverflow { |
||||
i.next = nextCGJCompose |
||||
break |
||||
} |
||||
if i.info.ccc < prevCC { |
||||
goto doNorm |
||||
} |
||||
} |
||||
return i.returnSlice(startp, i.p) |
||||
doNorm: |
||||
// reset to start position
|
||||
i.p = startp |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
i.rb.ss.first(i.info) |
||||
if i.info.multiSegment() { |
||||
d := i.info.Decomposition() |
||||
info := i.rb.f.info(input{bytes: d}, 0) |
||||
i.rb.insertUnsafe(input{bytes: d}, 0, info) |
||||
i.multiSeg = d[int(info.size):] |
||||
i.next = nextMultiNorm |
||||
return nextMultiNorm(i) |
||||
} |
||||
i.rb.ss.first(i.info) |
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info) |
||||
return doNormComposed(i) |
||||
} |
||||
|
||||
func doNormComposed(i *Iter) []byte { |
||||
// First rune should already be inserted.
|
||||
for { |
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc { |
||||
i.setDone() |
||||
break |
||||
} |
||||
i.info = i.rb.f.info(i.rb.src, i.p) |
||||
if s := i.rb.ss.next(i.info); s == ssStarter { |
||||
break |
||||
} else if s == ssOverflow { |
||||
i.next = nextCGJCompose |
||||
break |
||||
} |
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info) |
||||
} |
||||
i.rb.compose() |
||||
seg := i.buf[:i.rb.flushCopy(i.buf[:])] |
||||
return seg |
||||
} |
||||
|
||||
func nextCGJCompose(i *Iter) []byte { |
||||
i.rb.ss = 0 // instead of first
|
||||
i.rb.insertCGJ() |
||||
i.next = nextComposed |
||||
// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
|
||||
// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
|
||||
// If we ever change that, insert a check here.
|
||||
i.rb.ss.first(i.info) |
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info) |
||||
return doNormComposed(i) |
||||
} |
@ -0,0 +1,986 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Normalization table generator.
|
||||
// Data read from the web.
|
||||
// See forminfo.go for a description of the trie values associated with each rune.
|
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/binary" |
||||
"flag" |
||||
"fmt" |
||||
"io" |
||||
"log" |
||||
"sort" |
||||
"strconv" |
||||
"strings" |
||||
|
||||
"golang.org/x/text/internal/gen" |
||||
"golang.org/x/text/internal/triegen" |
||||
"golang.org/x/text/internal/ucd" |
||||
) |
||||
|
||||
func main() { |
||||
gen.Init() |
||||
loadUnicodeData() |
||||
compactCCC() |
||||
loadCompositionExclusions() |
||||
completeCharFields(FCanonical) |
||||
completeCharFields(FCompatibility) |
||||
computeNonStarterCounts() |
||||
verifyComputed() |
||||
printChars() |
||||
testDerived() |
||||
printTestdata() |
||||
makeTables() |
||||
} |
||||
|
||||
var ( |
||||
tablelist = flag.String("tables", |
||||
"all", |
||||
"comma-separated list of which tables to generate; "+ |
||||
"can be 'decomp', 'recomp', 'info' and 'all'") |
||||
test = flag.Bool("test", |
||||
false, |
||||
"test existing tables against DerivedNormalizationProps and generate test data for regression testing") |
||||
verbose = flag.Bool("verbose", |
||||
false, |
||||
"write data to stdout as it is parsed") |
||||
) |
||||
|
||||
const MaxChar = 0x10FFFF // anything above this shouldn't exist
|
||||
|
||||
// Quick Check properties of runes allow us to quickly
|
||||
// determine whether a rune may occur in a normal form.
|
||||
// For a given normal form, a rune may be guaranteed to occur
|
||||
// verbatim (QC=Yes), may or may not combine with another
|
||||
// rune (QC=Maybe), or may not occur (QC=No).
|
||||
type QCResult int |
||||
|
||||
const ( |
||||
QCUnknown QCResult = iota |
||||
QCYes |
||||
QCNo |
||||
QCMaybe |
||||
) |
||||
|
||||
func (r QCResult) String() string { |
||||
switch r { |
||||
case QCYes: |
||||
return "Yes" |
||||
case QCNo: |
||||
return "No" |
||||
case QCMaybe: |
||||
return "Maybe" |
||||
} |
||||
return "***UNKNOWN***" |
||||
} |
||||
|
||||
const ( |
||||
FCanonical = iota // NFC or NFD
|
||||
FCompatibility // NFKC or NFKD
|
||||
FNumberOfFormTypes |
||||
) |
||||
|
||||
const ( |
||||
MComposed = iota // NFC or NFKC
|
||||
MDecomposed // NFD or NFKD
|
||||
MNumberOfModes |
||||
) |
||||
|
||||
// This contains only the properties we're interested in.
|
||||
type Char struct { |
||||
name string |
||||
codePoint rune // if zero, this index is not a valid code point.
|
||||
ccc uint8 // canonical combining class
|
||||
origCCC uint8 |
||||
excludeInComp bool // from CompositionExclusions.txt
|
||||
compatDecomp bool // it has a compatibility expansion
|
||||
|
||||
nTrailingNonStarters uint8 |
||||
nLeadingNonStarters uint8 // must be equal to trailing if non-zero
|
||||
|
||||
forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
|
||||
|
||||
state State |
||||
} |
||||
|
||||
var chars = make([]Char, MaxChar+1) |
||||
var cccMap = make(map[uint8]uint8) |
||||
|
||||
func (c Char) String() string { |
||||
buf := new(bytes.Buffer) |
||||
|
||||
fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name) |
||||
fmt.Fprintf(buf, " ccc: %v\n", c.ccc) |
||||
fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp) |
||||
fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp) |
||||
fmt.Fprintf(buf, " state: %v\n", c.state) |
||||
fmt.Fprintf(buf, " NFC:\n") |
||||
fmt.Fprint(buf, c.forms[FCanonical]) |
||||
fmt.Fprintf(buf, " NFKC:\n") |
||||
fmt.Fprint(buf, c.forms[FCompatibility]) |
||||
|
||||
return buf.String() |
||||
} |
||||
|
||||
// In UnicodeData.txt, some ranges are marked like this:
|
||||
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
||||
// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
|
||||
// parseCharacter keeps a state variable indicating the weirdness.
|
||||
type State int |
||||
|
||||
const ( |
||||
SNormal State = iota // known to be zero for the type
|
||||
SFirst |
||||
SLast |
||||
SMissing |
||||
) |
||||
|
||||
var lastChar = rune('\u0000') |
||||
|
||||
func (c Char) isValid() bool { |
||||
return c.codePoint != 0 && c.state != SMissing |
||||
} |
||||
|
||||
type FormInfo struct { |
||||
quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
|
||||
verified [MNumberOfModes]bool // index: MComposed or MDecomposed
|
||||
|
||||
combinesForward bool // May combine with rune on the right
|
||||
combinesBackward bool // May combine with rune on the left
|
||||
isOneWay bool // Never appears in result
|
||||
inDecomp bool // Some decompositions result in this char.
|
||||
decomp Decomposition |
||||
expandedDecomp Decomposition |
||||
} |
||||
|
||||
func (f FormInfo) String() string { |
||||
buf := bytes.NewBuffer(make([]byte, 0)) |
||||
|
||||
fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed]) |
||||
fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed]) |
||||
fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward) |
||||
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward) |
||||
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay) |
||||
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp) |
||||
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp) |
||||
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp) |
||||
|
||||
return buf.String() |
||||
} |
||||
|
||||
type Decomposition []rune |
||||
|
||||
func parseDecomposition(s string, skipfirst bool) (a []rune, err error) { |
||||
decomp := strings.Split(s, " ") |
||||
if len(decomp) > 0 && skipfirst { |
||||
decomp = decomp[1:] |
||||
} |
||||
for _, d := range decomp { |
||||
point, err := strconv.ParseUint(d, 16, 64) |
||||
if err != nil { |
||||
return a, err |
||||
} |
||||
a = append(a, rune(point)) |
||||
} |
||||
return a, nil |
||||
} |
||||
|
||||
func loadUnicodeData() { |
||||
f := gen.OpenUCDFile("UnicodeData.txt") |
||||
defer f.Close() |
||||
p := ucd.New(f) |
||||
for p.Next() { |
||||
r := p.Rune(ucd.CodePoint) |
||||
char := &chars[r] |
||||
|
||||
char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass)) |
||||
decmap := p.String(ucd.DecompMapping) |
||||
|
||||
exp, err := parseDecomposition(decmap, false) |
||||
isCompat := false |
||||
if err != nil { |
||||
if len(decmap) > 0 { |
||||
exp, err = parseDecomposition(decmap, true) |
||||
if err != nil { |
||||
log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err) |
||||
} |
||||
isCompat = true |
||||
} |
||||
} |
||||
|
||||
char.name = p.String(ucd.Name) |
||||
char.codePoint = r |
||||
char.forms[FCompatibility].decomp = exp |
||||
if !isCompat { |
||||
char.forms[FCanonical].decomp = exp |
||||
} else { |
||||
char.compatDecomp = true |
||||
} |
||||
if len(decmap) > 0 { |
||||
char.forms[FCompatibility].decomp = exp |
||||
} |
||||
} |
||||
if err := p.Err(); err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
} |
||||
|
||||
// compactCCC converts the sparse set of CCC values to a continguous one,
|
||||
// reducing the number of bits needed from 8 to 6.
|
||||
func compactCCC() { |
||||
m := make(map[uint8]uint8) |
||||
for i := range chars { |
||||
c := &chars[i] |
||||
m[c.ccc] = 0 |
||||
} |
||||
cccs := []int{} |
||||
for v, _ := range m { |
||||
cccs = append(cccs, int(v)) |
||||
} |
||||
sort.Ints(cccs) |
||||
for i, c := range cccs { |
||||
cccMap[uint8(i)] = uint8(c) |
||||
m[uint8(c)] = uint8(i) |
||||
} |
||||
for i := range chars { |
||||
c := &chars[i] |
||||
c.origCCC = c.ccc |
||||
c.ccc = m[c.ccc] |
||||
} |
||||
if len(m) >= 1<<6 { |
||||
log.Fatalf("too many difference CCC values: %d >= 64", len(m)) |
||||
} |
||||
} |
||||
|
||||
// CompositionExclusions.txt has form:
|
||||
// 0958 # ...
|
||||
// See https://unicode.org/reports/tr44/ for full explanation
|
||||
func loadCompositionExclusions() { |
||||
f := gen.OpenUCDFile("CompositionExclusions.txt") |
||||
defer f.Close() |
||||
p := ucd.New(f) |
||||
for p.Next() { |
||||
c := &chars[p.Rune(0)] |
||||
if c.excludeInComp { |
||||
log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint) |
||||
} |
||||
c.excludeInComp = true |
||||
} |
||||
if e := p.Err(); e != nil { |
||||
log.Fatal(e) |
||||
} |
||||
} |
||||
|
||||
// hasCompatDecomp returns true if any of the recursive
|
||||
// decompositions contains a compatibility expansion.
|
||||
// In this case, the character may not occur in NFK*.
|
||||
func hasCompatDecomp(r rune) bool { |
||||
c := &chars[r] |
||||
if c.compatDecomp { |
||||
return true |
||||
} |
||||
for _, d := range c.forms[FCompatibility].decomp { |
||||
if hasCompatDecomp(d) { |
||||
return true |
||||
} |
||||
} |
||||
return false |
||||
} |
||||
|
||||
// Hangul related constants.
|
||||
const ( |
||||
HangulBase = 0xAC00 |
||||
HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
|
||||
|
||||
JamoLBase = 0x1100 |
||||
JamoLEnd = 0x1113 |
||||
JamoVBase = 0x1161 |
||||
JamoVEnd = 0x1176 |
||||
JamoTBase = 0x11A8 |
||||
JamoTEnd = 0x11C3 |
||||
|
||||
JamoLVTCount = 19 * 21 * 28 |
||||
JamoTCount = 28 |
||||
) |
||||
|
||||
func isHangul(r rune) bool { |
||||
return HangulBase <= r && r < HangulEnd |
||||
} |
||||
|
||||
func isHangulWithoutJamoT(r rune) bool { |
||||
if !isHangul(r) { |
||||
return false |
||||
} |
||||
r -= HangulBase |
||||
return r < JamoLVTCount && r%JamoTCount == 0 |
||||
} |
||||
|
||||
func ccc(r rune) uint8 { |
||||
return chars[r].ccc |
||||
} |
||||
|
||||
// Insert a rune in a buffer, ordered by Canonical Combining Class.
|
||||
func insertOrdered(b Decomposition, r rune) Decomposition { |
||||
n := len(b) |
||||
b = append(b, 0) |
||||
cc := ccc(r) |
||||
if cc > 0 { |
||||
// Use bubble sort.
|
||||
for ; n > 0; n-- { |
||||
if ccc(b[n-1]) <= cc { |
||||
break |
||||
} |
||||
b[n] = b[n-1] |
||||
} |
||||
} |
||||
b[n] = r |
||||
return b |
||||
} |
||||
|
||||
// Recursively decompose.
|
||||
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition { |
||||
dcomp := chars[r].forms[form].decomp |
||||
if len(dcomp) == 0 { |
||||
return insertOrdered(d, r) |
||||
} |
||||
for _, c := range dcomp { |
||||
d = decomposeRecursive(form, c, d) |
||||
} |
||||
return d |
||||
} |
||||
|
||||
func completeCharFields(form int) { |
||||
// Phase 0: pre-expand decomposition.
|
||||
for i := range chars { |
||||
f := &chars[i].forms[form] |
||||
if len(f.decomp) == 0 { |
||||
continue |
||||
} |
||||
exp := make(Decomposition, 0) |
||||
for _, c := range f.decomp { |
||||
exp = decomposeRecursive(form, c, exp) |
||||
} |
||||
f.expandedDecomp = exp |
||||
} |
||||
|
||||
// Phase 1: composition exclusion, mark decomposition.
|
||||
for i := range chars { |
||||
c := &chars[i] |
||||
f := &c.forms[form] |
||||
|
||||
// Marks script-specific exclusions and version restricted.
|
||||
f.isOneWay = c.excludeInComp |
||||
|
||||
// Singletons
|
||||
f.isOneWay = f.isOneWay || len(f.decomp) == 1 |
||||
|
||||
// Non-starter decompositions
|
||||
if len(f.decomp) > 1 { |
||||
chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0 |
||||
f.isOneWay = f.isOneWay || chk |
||||
} |
||||
|
||||
// Runes that decompose into more than two runes.
|
||||
f.isOneWay = f.isOneWay || len(f.decomp) > 2 |
||||
|
||||
if form == FCompatibility { |
||||
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint) |
||||
} |
||||
|
||||
for _, r := range f.decomp { |
||||
chars[r].forms[form].inDecomp = true |
||||
} |
||||
} |
||||
|
||||
// Phase 2: forward and backward combining.
|
||||
for i := range chars { |
||||
c := &chars[i] |
||||
f := &c.forms[form] |
||||
|
||||
if !f.isOneWay && len(f.decomp) == 2 { |
||||
f0 := &chars[f.decomp[0]].forms[form] |
||||
f1 := &chars[f.decomp[1]].forms[form] |
||||
if !f0.isOneWay { |
||||
f0.combinesForward = true |
||||
} |
||||
if !f1.isOneWay { |
||||
f1.combinesBackward = true |
||||
} |
||||
} |
||||
if isHangulWithoutJamoT(rune(i)) { |
||||
f.combinesForward = true |
||||
} |
||||
} |
||||
|
||||
// Phase 3: quick check values.
|
||||
for i := range chars { |
||||
c := &chars[i] |
||||
f := &c.forms[form] |
||||
|
||||
switch { |
||||
case len(f.decomp) > 0: |
||||
f.quickCheck[MDecomposed] = QCNo |
||||
case isHangul(rune(i)): |
||||
f.quickCheck[MDecomposed] = QCNo |
||||
default: |
||||
f.quickCheck[MDecomposed] = QCYes |
||||
} |
||||
switch { |
||||
case f.isOneWay: |
||||
f.quickCheck[MComposed] = QCNo |
||||
case (i & 0xffff00) == JamoLBase: |
||||
f.quickCheck[MComposed] = QCYes |
||||
if JamoLBase <= i && i < JamoLEnd { |
||||
f.combinesForward = true |
||||
} |
||||
if JamoVBase <= i && i < JamoVEnd { |
||||
f.quickCheck[MComposed] = QCMaybe |
||||
f.combinesBackward = true |
||||
f.combinesForward = true |
||||
} |
||||
if JamoTBase <= i && i < JamoTEnd { |
||||
f.quickCheck[MComposed] = QCMaybe |
||||
f.combinesBackward = true |
||||
} |
||||
case !f.combinesBackward: |
||||
f.quickCheck[MComposed] = QCYes |
||||
default: |
||||
f.quickCheck[MComposed] = QCMaybe |
||||
} |
||||
} |
||||
} |
||||
|
||||
func computeNonStarterCounts() { |
||||
// Phase 4: leading and trailing non-starter count
|
||||
for i := range chars { |
||||
c := &chars[i] |
||||
|
||||
runes := []rune{rune(i)} |
||||
// We always use FCompatibility so that the CGJ insertion points do not
|
||||
// change for repeated normalizations with different forms.
|
||||
if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 { |
||||
runes = exp |
||||
} |
||||
// We consider runes that combine backwards to be non-starters for the
|
||||
// purpose of Stream-Safe Text Processing.
|
||||
for _, r := range runes { |
||||
if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward { |
||||
break |
||||
} |
||||
c.nLeadingNonStarters++ |
||||
} |
||||
for i := len(runes) - 1; i >= 0; i-- { |
||||
if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward { |
||||
break |
||||
} |
||||
c.nTrailingNonStarters++ |
||||
} |
||||
if c.nTrailingNonStarters > 3 { |
||||
log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes) |
||||
} |
||||
|
||||
if isHangul(rune(i)) { |
||||
c.nTrailingNonStarters = 2 |
||||
if isHangulWithoutJamoT(rune(i)) { |
||||
c.nTrailingNonStarters = 1 |
||||
} |
||||
} |
||||
|
||||
if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t { |
||||
log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t) |
||||
} |
||||
if t := c.nTrailingNonStarters; t > 3 { |
||||
log.Fatalf("%U: number of trailing non-starters is %d > 3", t) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func printBytes(w io.Writer, b []byte, name string) { |
||||
fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b)) |
||||
fmt.Fprintf(w, "var %s = [...]byte {", name) |
||||
for i, c := range b { |
||||
switch { |
||||
case i%64 == 0: |
||||
fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63) |
||||
case i%8 == 0: |
||||
fmt.Fprintf(w, "\n") |
||||
} |
||||
fmt.Fprintf(w, "0x%.2X, ", c) |
||||
} |
||||
fmt.Fprint(w, "\n}\n\n") |
||||
} |
||||
|
||||
// See forminfo.go for format.
|
||||
func makeEntry(f *FormInfo, c *Char) uint16 { |
||||
e := uint16(0) |
||||
if r := c.codePoint; HangulBase <= r && r < HangulEnd { |
||||
e |= 0x40 |
||||
} |
||||
if f.combinesForward { |
||||
e |= 0x20 |
||||
} |
||||
if f.quickCheck[MDecomposed] == QCNo { |
||||
e |= 0x4 |
||||
} |
||||
switch f.quickCheck[MComposed] { |
||||
case QCYes: |
||||
case QCNo: |
||||
e |= 0x10 |
||||
case QCMaybe: |
||||
e |= 0x18 |
||||
default: |
||||
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed]) |
||||
} |
||||
e |= uint16(c.nTrailingNonStarters) |
||||
return e |
||||
} |
||||
|
||||
// decompSet keeps track of unique decompositions, grouped by whether
|
||||
// the decomposition is followed by a trailing and/or leading CCC.
|
||||
type decompSet [7]map[string]bool |
||||
|
||||
const ( |
||||
normalDecomp = iota |
||||
firstMulti |
||||
firstCCC |
||||
endMulti |
||||
firstLeadingCCC |
||||
firstCCCZeroExcept |
||||
firstStarterWithNLead |
||||
lastDecomp |
||||
) |
||||
|
||||
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"} |
||||
|
||||
func makeDecompSet() decompSet { |
||||
m := decompSet{} |
||||
for i := range m { |
||||
m[i] = make(map[string]bool) |
||||
} |
||||
return m |
||||
} |
||||
func (m *decompSet) insert(key int, s string) { |
||||
m[key][s] = true |
||||
} |
||||
|
||||
func printCharInfoTables(w io.Writer) int { |
||||
mkstr := func(r rune, f *FormInfo) (int, string) { |
||||
d := f.expandedDecomp |
||||
s := string([]rune(d)) |
||||
if max := 1 << 6; len(s) >= max { |
||||
const msg = "%U: too many bytes in decomposition: %d >= %d" |
||||
log.Fatalf(msg, r, len(s), max) |
||||
} |
||||
head := uint8(len(s)) |
||||
if f.quickCheck[MComposed] != QCYes { |
||||
head |= 0x40 |
||||
} |
||||
if f.combinesForward { |
||||
head |= 0x80 |
||||
} |
||||
s = string([]byte{head}) + s |
||||
|
||||
lccc := ccc(d[0]) |
||||
tccc := ccc(d[len(d)-1]) |
||||
cc := ccc(r) |
||||
if cc != 0 && lccc == 0 && tccc == 0 { |
||||
log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc) |
||||
} |
||||
if tccc < lccc && lccc != 0 { |
||||
const msg = "%U: lccc (%d) must be <= tcc (%d)" |
||||
log.Fatalf(msg, r, lccc, tccc) |
||||
} |
||||
index := normalDecomp |
||||
nTrail := chars[r].nTrailingNonStarters |
||||
nLead := chars[r].nLeadingNonStarters |
||||
if tccc > 0 || lccc > 0 || nTrail > 0 { |
||||
tccc <<= 2 |
||||
tccc |= nTrail |
||||
s += string([]byte{tccc}) |
||||
index = endMulti |
||||
for _, r := range d[1:] { |
||||
if ccc(r) == 0 { |
||||
index = firstCCC |
||||
} |
||||
} |
||||
if lccc > 0 || nLead > 0 { |
||||
s += string([]byte{lccc}) |
||||
if index == firstCCC { |
||||
log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r) |
||||
} |
||||
index = firstLeadingCCC |
||||
} |
||||
if cc != lccc { |
||||
if cc != 0 { |
||||
log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc) |
||||
} |
||||
index = firstCCCZeroExcept |
||||
} |
||||
} else if len(d) > 1 { |
||||
index = firstMulti |
||||
} |
||||
return index, s |
||||
} |
||||
|
||||
decompSet := makeDecompSet() |
||||
const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
|
||||
decompSet.insert(firstStarterWithNLead, nLeadStr) |
||||
|
||||
// Store the uniqued decompositions in a byte buffer,
|
||||
// preceded by their byte length.
|
||||
for _, c := range chars { |
||||
for _, f := range c.forms { |
||||
if len(f.expandedDecomp) == 0 { |
||||
continue |
||||
} |
||||
if f.combinesBackward { |
||||
log.Fatalf("%U: combinesBackward and decompose", c.codePoint) |
||||
} |
||||
index, s := mkstr(c.codePoint, &f) |
||||
decompSet.insert(index, s) |
||||
} |
||||
} |
||||
|
||||
decompositions := bytes.NewBuffer(make([]byte, 0, 10000)) |
||||
size := 0 |
||||
positionMap := make(map[string]uint16) |
||||
decompositions.WriteString("\000") |
||||
fmt.Fprintln(w, "const (") |
||||
for i, m := range decompSet { |
||||
sa := []string{} |
||||
for s := range m { |
||||
sa = append(sa, s) |
||||
} |
||||
sort.Strings(sa) |
||||
for _, s := range sa { |
||||
p := decompositions.Len() |
||||
decompositions.WriteString(s) |
||||
positionMap[s] = uint16(p) |
||||
} |
||||
if cname[i] != "" { |
||||
fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len()) |
||||
} |
||||
} |
||||
fmt.Fprintln(w, "maxDecomp = 0x8000") |
||||
fmt.Fprintln(w, ")") |
||||
b := decompositions.Bytes() |
||||
printBytes(w, b, "decomps") |
||||
size += len(b) |
||||
|
||||
varnames := []string{"nfc", "nfkc"} |
||||
for i := 0; i < FNumberOfFormTypes; i++ { |
||||
trie := triegen.NewTrie(varnames[i]) |
||||
|
||||
for r, c := range chars { |
||||
f := c.forms[i] |
||||
d := f.expandedDecomp |
||||
if len(d) != 0 { |
||||
_, key := mkstr(c.codePoint, &f) |
||||
trie.Insert(rune(r), uint64(positionMap[key])) |
||||
if c.ccc != ccc(d[0]) { |
||||
// We assume the lead ccc of a decomposition !=0 in this case.
|
||||
if ccc(d[0]) == 0 { |
||||
log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc) |
||||
} |
||||
} |
||||
} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward { |
||||
// Handle cases where it can't be detected that the nLead should be equal
|
||||
// to nTrail.
|
||||
trie.Insert(c.codePoint, uint64(positionMap[nLeadStr])) |
||||
} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 { |
||||
trie.Insert(c.codePoint, uint64(0x8000|v)) |
||||
} |
||||
} |
||||
sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]})) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
size += sz |
||||
} |
||||
return size |
||||
} |
||||
|
||||
func contains(sa []string, s string) bool { |
||||
for _, a := range sa { |
||||
if a == s { |
||||
return true |
||||
} |
||||
} |
||||
return false |
||||
} |
||||
|
||||
func makeTables() { |
||||
w := &bytes.Buffer{} |
||||
|
||||
size := 0 |
||||
if *tablelist == "" { |
||||
return |
||||
} |
||||
list := strings.Split(*tablelist, ",") |
||||
if *tablelist == "all" { |
||||
list = []string{"recomp", "info"} |
||||
} |
||||
|
||||
// Compute maximum decomposition size.
|
||||
max := 0 |
||||
for _, c := range chars { |
||||
if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max { |
||||
max = n |
||||
} |
||||
} |
||||
fmt.Fprintln(w, `import "sync"`) |
||||
fmt.Fprintln(w) |
||||
|
||||
fmt.Fprintln(w, "const (") |
||||
fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.") |
||||
fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion()) |
||||
fmt.Fprintln(w) |
||||
fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform") |
||||
fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at") |
||||
fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that") |
||||
fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.") |
||||
fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max) |
||||
fmt.Fprintln(w, ")\n") |
||||
|
||||
// Print the CCC remap table.
|
||||
size += len(cccMap) |
||||
fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap)) |
||||
for i := 0; i < len(cccMap); i++ { |
||||
if i%8 == 0 { |
||||
fmt.Fprintln(w) |
||||
} |
||||
fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)]) |
||||
} |
||||
fmt.Fprintln(w, "\n}\n") |
||||
|
||||
if contains(list, "info") { |
||||
size += printCharInfoTables(w) |
||||
} |
||||
|
||||
if contains(list, "recomp") { |
||||
// Note that we use 32 bit keys, instead of 64 bit.
|
||||
// This clips the bits of three entries, but we know
|
||||
// this won't cause a collision. The compiler will catch
|
||||
// any changes made to UnicodeData.txt that introduces
|
||||
// a collision.
|
||||
// Note that the recomposition map for NFC and NFKC
|
||||
// are identical.
|
||||
|
||||
// Recomposition map
|
||||
nrentries := 0 |
||||
for _, c := range chars { |
||||
f := c.forms[FCanonical] |
||||
if !f.isOneWay && len(f.decomp) > 0 { |
||||
nrentries++ |
||||
} |
||||
} |
||||
sz := nrentries * 8 |
||||
size += sz |
||||
fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz) |
||||
fmt.Fprintln(w, "var recompMap map[uint32]rune") |
||||
fmt.Fprintln(w, "var recompMapOnce sync.Once\n") |
||||
fmt.Fprintln(w, `const recompMapPacked = "" +`) |
||||
var buf [8]byte |
||||
for i, c := range chars { |
||||
f := c.forms[FCanonical] |
||||
d := f.decomp |
||||
if !f.isOneWay && len(d) > 0 { |
||||
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1])) |
||||
binary.BigEndian.PutUint32(buf[:4], key) |
||||
binary.BigEndian.PutUint32(buf[4:], uint32(i)) |
||||
fmt.Fprintf(w, "\t\t%q + // 0x%.8X: 0x%.8X\n", string(buf[:]), key, uint32(i)) |
||||
} |
||||
} |
||||
// hack so we don't have to special case the trailing plus sign
|
||||
fmt.Fprintf(w, ` ""`) |
||||
fmt.Fprintln(w) |
||||
} |
||||
|
||||
fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size) |
||||
gen.WriteVersionedGoFile("tables.go", "norm", w.Bytes()) |
||||
} |
||||
|
||||
func printChars() { |
||||
if *verbose { |
||||
for _, c := range chars { |
||||
if !c.isValid() || c.state == SMissing { |
||||
continue |
||||
} |
||||
fmt.Println(c) |
||||
} |
||||
} |
||||
} |
||||
|
||||
// verifyComputed does various consistency tests.
|
||||
func verifyComputed() { |
||||
for i, c := range chars { |
||||
for _, f := range c.forms { |
||||
isNo := (f.quickCheck[MDecomposed] == QCNo) |
||||
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) { |
||||
log.Fatalf("%U: NF*D QC must be No if rune decomposes", i) |
||||
} |
||||
|
||||
isMaybe := f.quickCheck[MComposed] == QCMaybe |
||||
if f.combinesBackward != isMaybe { |
||||
log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i) |
||||
} |
||||
if len(f.decomp) > 0 && f.combinesForward && isMaybe { |
||||
log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i) |
||||
} |
||||
|
||||
if len(f.expandedDecomp) != 0 { |
||||
continue |
||||
} |
||||
if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b { |
||||
// We accept these runes to be treated differently (it only affects
|
||||
// segment breaking in iteration, most likely on improper use), but
|
||||
// reconsider if more characters are added.
|
||||
// U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;;
|
||||
// U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;;
|
||||
// U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
|
||||
// U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
|
||||
// U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;;
|
||||
// U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;;
|
||||
if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) { |
||||
log.Fatalf("%U: nLead was %v; want %v", i, a, b) |
||||
} |
||||
} |
||||
} |
||||
nfc := c.forms[FCanonical] |
||||
nfkc := c.forms[FCompatibility] |
||||
if nfc.combinesBackward != nfkc.combinesBackward { |
||||
log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint) |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Use values in DerivedNormalizationProps.txt to compare against the
|
||||
// values we computed.
|
||||
// DerivedNormalizationProps.txt has form:
|
||||
// 00C0..00C5 ; NFD_QC; N # ...
|
||||
// 0374 ; NFD_QC; N # ...
|
||||
// See https://unicode.org/reports/tr44/ for full explanation
|
||||
func testDerived() { |
||||
f := gen.OpenUCDFile("DerivedNormalizationProps.txt") |
||||
defer f.Close() |
||||
p := ucd.New(f) |
||||
for p.Next() { |
||||
r := p.Rune(0) |
||||
c := &chars[r] |
||||
|
||||
var ftype, mode int |
||||
qt := p.String(1) |
||||
switch qt { |
||||
case "NFC_QC": |
||||
ftype, mode = FCanonical, MComposed |
||||
case "NFD_QC": |
||||
ftype, mode = FCanonical, MDecomposed |
||||
case "NFKC_QC": |
||||
ftype, mode = FCompatibility, MComposed |
||||
case "NFKD_QC": |
||||
ftype, mode = FCompatibility, MDecomposed |
||||
default: |
||||
continue |
||||
} |
||||
var qr QCResult |
||||
switch p.String(2) { |
||||
case "Y": |
||||
qr = QCYes |
||||
case "N": |
||||
qr = QCNo |
||||
case "M": |
||||
qr = QCMaybe |
||||
default: |
||||
log.Fatalf(`Unexpected quick check value "%s"`, p.String(2)) |
||||
} |
||||
if got := c.forms[ftype].quickCheck[mode]; got != qr { |
||||
log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr) |
||||
} |
||||
c.forms[ftype].verified[mode] = true |
||||
} |
||||
if err := p.Err(); err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
// Any unspecified value must be QCYes. Verify this.
|
||||
for i, c := range chars { |
||||
for j, fd := range c.forms { |
||||
for k, qr := range fd.quickCheck { |
||||
if !fd.verified[k] && qr != QCYes { |
||||
m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n" |
||||
log.Printf(m, i, j, k, qr, c.name) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
var testHeader = `const ( |
||||
Yes = iota |
||||
No |
||||
Maybe |
||||
) |
||||
|
||||
type formData struct { |
||||
qc uint8 |
||||
combinesForward bool |
||||
decomposition string |
||||
} |
||||
|
||||
type runeData struct { |
||||
r rune |
||||
ccc uint8 |
||||
nLead uint8 |
||||
nTrail uint8 |
||||
f [2]formData // 0: canonical; 1: compatibility
|
||||
} |
||||
|
||||
func f(qc uint8, cf bool, dec string) [2]formData { |
||||
return [2]formData{{qc, cf, dec}, {qc, cf, dec}} |
||||
} |
||||
|
||||
func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData { |
||||
return [2]formData{{qc, cf, d}, {qck, cfk, dk}} |
||||
} |
||||
|
||||
var testData = []runeData{ |
||||
` |
||||
|
||||
func printTestdata() { |
||||
type lastInfo struct { |
||||
ccc uint8 |
||||
nLead uint8 |
||||
nTrail uint8 |
||||
f string |
||||
} |
||||
|
||||
last := lastInfo{} |
||||
w := &bytes.Buffer{} |
||||
fmt.Fprintf(w, testHeader) |
||||
for r, c := range chars { |
||||
f := c.forms[FCanonical] |
||||
qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) |
||||
f = c.forms[FCompatibility] |
||||
qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) |
||||
s := "" |
||||
if d == dk && qc == qck && cf == cfk { |
||||
s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d) |
||||
} else { |
||||
s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk) |
||||
} |
||||
current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s} |
||||
if last != current { |
||||
fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s) |
||||
last = current |
||||
} |
||||
} |
||||
fmt.Fprintln(w, "}") |
||||
gen.WriteVersionedGoFile("data_test.go", "norm", w.Bytes()) |
||||
} |
@ -0,0 +1,609 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Note: the file data_test.go that is generated should not be checked in.
|
||||
//go:generate go run maketables.go triegen.go
|
||||
//go:generate go test -tags test
|
||||
|
||||
// Package norm contains types and functions for normalizing Unicode strings.
|
||||
package norm // import "golang.org/x/text/unicode/norm"
|
||||
|
||||
import ( |
||||
"unicode/utf8" |
||||
|
||||
"golang.org/x/text/transform" |
||||
) |
||||
|
||||
// A Form denotes a canonical representation of Unicode code points.
|
||||
// The Unicode-defined normalization and equivalence forms are:
|
||||
//
|
||||
// NFC Unicode Normalization Form C
|
||||
// NFD Unicode Normalization Form D
|
||||
// NFKC Unicode Normalization Form KC
|
||||
// NFKD Unicode Normalization Form KD
|
||||
//
|
||||
// For a Form f, this documentation uses the notation f(x) to mean
|
||||
// the bytes or string x converted to the given form.
|
||||
// A position n in x is called a boundary if conversion to the form can
|
||||
// proceed independently on both sides:
|
||||
// f(x) == append(f(x[0:n]), f(x[n:])...)
|
||||
//
|
||||
// References: https://unicode.org/reports/tr15/ and
|
||||
// https://unicode.org/notes/tn5/.
|
||||
type Form int |
||||
|
||||
const ( |
||||
NFC Form = iota |
||||
NFD |
||||
NFKC |
||||
NFKD |
||||
) |
||||
|
||||
// Bytes returns f(b). May return b if f(b) = b.
|
||||
func (f Form) Bytes(b []byte) []byte { |
||||
src := inputBytes(b) |
||||
ft := formTable[f] |
||||
n, ok := ft.quickSpan(src, 0, len(b), true) |
||||
if ok { |
||||
return b |
||||
} |
||||
out := make([]byte, n, len(b)) |
||||
copy(out, b[0:n]) |
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush} |
||||
return doAppendInner(&rb, n) |
||||
} |
||||
|
||||
// String returns f(s).
|
||||
func (f Form) String(s string) string { |
||||
src := inputString(s) |
||||
ft := formTable[f] |
||||
n, ok := ft.quickSpan(src, 0, len(s), true) |
||||
if ok { |
||||
return s |
||||
} |
||||
out := make([]byte, n, len(s)) |
||||
copy(out, s[0:n]) |
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush} |
||||
return string(doAppendInner(&rb, n)) |
||||
} |
||||
|
||||
// IsNormal returns true if b == f(b).
|
||||
func (f Form) IsNormal(b []byte) bool { |
||||
src := inputBytes(b) |
||||
ft := formTable[f] |
||||
bp, ok := ft.quickSpan(src, 0, len(b), true) |
||||
if ok { |
||||
return true |
||||
} |
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)} |
||||
rb.setFlusher(nil, cmpNormalBytes) |
||||
for bp < len(b) { |
||||
rb.out = b[bp:] |
||||
if bp = decomposeSegment(&rb, bp, true); bp < 0 { |
||||
return false |
||||
} |
||||
bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true) |
||||
} |
||||
return true |
||||
} |
||||
|
||||
func cmpNormalBytes(rb *reorderBuffer) bool { |
||||
b := rb.out |
||||
for i := 0; i < rb.nrune; i++ { |
||||
info := rb.rune[i] |
||||
if int(info.size) > len(b) { |
||||
return false |
||||
} |
||||
p := info.pos |
||||
pe := p + info.size |
||||
for ; p < pe; p++ { |
||||
if b[0] != rb.byte[p] { |
||||
return false |
||||
} |
||||
b = b[1:] |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// IsNormalString returns true if s == f(s).
|
||||
func (f Form) IsNormalString(s string) bool { |
||||
src := inputString(s) |
||||
ft := formTable[f] |
||||
bp, ok := ft.quickSpan(src, 0, len(s), true) |
||||
if ok { |
||||
return true |
||||
} |
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)} |
||||
rb.setFlusher(nil, func(rb *reorderBuffer) bool { |
||||
for i := 0; i < rb.nrune; i++ { |
||||
info := rb.rune[i] |
||||
if bp+int(info.size) > len(s) { |
||||
return false |
||||
} |
||||
p := info.pos |
||||
pe := p + info.size |
||||
for ; p < pe; p++ { |
||||
if s[bp] != rb.byte[p] { |
||||
return false |
||||
} |
||||
bp++ |
||||
} |
||||
} |
||||
return true |
||||
}) |
||||
for bp < len(s) { |
||||
if bp = decomposeSegment(&rb, bp, true); bp < 0 { |
||||
return false |
||||
} |
||||
bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true) |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// patchTail fixes a case where a rune may be incorrectly normalized
|
||||
// if it is followed by illegal continuation bytes. It returns the
|
||||
// patched buffer and whether the decomposition is still in progress.
|
||||
func patchTail(rb *reorderBuffer) bool { |
||||
info, p := lastRuneStart(&rb.f, rb.out) |
||||
if p == -1 || info.size == 0 { |
||||
return true |
||||
} |
||||
end := p + int(info.size) |
||||
extra := len(rb.out) - end |
||||
if extra > 0 { |
||||
// Potentially allocating memory. However, this only
|
||||
// happens with ill-formed UTF-8.
|
||||
x := make([]byte, 0) |
||||
x = append(x, rb.out[len(rb.out)-extra:]...) |
||||
rb.out = rb.out[:end] |
||||
decomposeToLastBoundary(rb) |
||||
rb.doFlush() |
||||
rb.out = append(rb.out, x...) |
||||
return false |
||||
} |
||||
buf := rb.out[p:] |
||||
rb.out = rb.out[:p] |
||||
decomposeToLastBoundary(rb) |
||||
if s := rb.ss.next(info); s == ssStarter { |
||||
rb.doFlush() |
||||
rb.ss.first(info) |
||||
} else if s == ssOverflow { |
||||
rb.doFlush() |
||||
rb.insertCGJ() |
||||
rb.ss = 0 |
||||
} |
||||
rb.insertUnsafe(inputBytes(buf), 0, info) |
||||
return true |
||||
} |
||||
|
||||
func appendQuick(rb *reorderBuffer, i int) int { |
||||
if rb.nsrc == i { |
||||
return i |
||||
} |
||||
end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true) |
||||
rb.out = rb.src.appendSlice(rb.out, i, end) |
||||
return end |
||||
} |
||||
|
||||
// Append returns f(append(out, b...)).
|
||||
// The buffer out must be nil, empty, or equal to f(out).
|
||||
func (f Form) Append(out []byte, src ...byte) []byte { |
||||
return f.doAppend(out, inputBytes(src), len(src)) |
||||
} |
||||
|
||||
func (f Form) doAppend(out []byte, src input, n int) []byte { |
||||
if n == 0 { |
||||
return out |
||||
} |
||||
ft := formTable[f] |
||||
// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
|
||||
if len(out) == 0 { |
||||
p, _ := ft.quickSpan(src, 0, n, true) |
||||
out = src.appendSlice(out, 0, p) |
||||
if p == n { |
||||
return out |
||||
} |
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush} |
||||
return doAppendInner(&rb, p) |
||||
} |
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: n} |
||||
return doAppend(&rb, out, 0) |
||||
} |
||||
|
||||
func doAppend(rb *reorderBuffer, out []byte, p int) []byte { |
||||
rb.setFlusher(out, appendFlush) |
||||
src, n := rb.src, rb.nsrc |
||||
doMerge := len(out) > 0 |
||||
if q := src.skipContinuationBytes(p); q > p { |
||||
// Move leading non-starters to destination.
|
||||
rb.out = src.appendSlice(rb.out, p, q) |
||||
p = q |
||||
doMerge = patchTail(rb) |
||||
} |
||||
fd := &rb.f |
||||
if doMerge { |
||||
var info Properties |
||||
if p < n { |
||||
info = fd.info(src, p) |
||||
if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 { |
||||
if p == 0 { |
||||
decomposeToLastBoundary(rb) |
||||
} |
||||
p = decomposeSegment(rb, p, true) |
||||
} |
||||
} |
||||
if info.size == 0 { |
||||
rb.doFlush() |
||||
// Append incomplete UTF-8 encoding.
|
||||
return src.appendSlice(rb.out, p, n) |
||||
} |
||||
if rb.nrune > 0 { |
||||
return doAppendInner(rb, p) |
||||
} |
||||
} |
||||
p = appendQuick(rb, p) |
||||
return doAppendInner(rb, p) |
||||
} |
||||
|
||||
func doAppendInner(rb *reorderBuffer, p int) []byte { |
||||
for n := rb.nsrc; p < n; { |
||||
p = decomposeSegment(rb, p, true) |
||||
p = appendQuick(rb, p) |
||||
} |
||||
return rb.out |
||||
} |
||||
|
||||
// AppendString returns f(append(out, []byte(s))).
|
||||
// The buffer out must be nil, empty, or equal to f(out).
|
||||
func (f Form) AppendString(out []byte, src string) []byte { |
||||
return f.doAppend(out, inputString(src), len(src)) |
||||
} |
||||
|
||||
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
|
||||
// It is not guaranteed to return the largest such n.
|
||||
func (f Form) QuickSpan(b []byte) int { |
||||
n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true) |
||||
return n |
||||
} |
||||
|
||||
// Span implements transform.SpanningTransformer. It returns a boundary n such
|
||||
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
|
||||
func (f Form) Span(b []byte, atEOF bool) (n int, err error) { |
||||
n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF) |
||||
if n < len(b) { |
||||
if !ok { |
||||
err = transform.ErrEndOfSpan |
||||
} else { |
||||
err = transform.ErrShortSrc |
||||
} |
||||
} |
||||
return n, err |
||||
} |
||||
|
||||
// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
|
||||
// It is not guaranteed to return the largest such n.
|
||||
func (f Form) SpanString(s string, atEOF bool) (n int, err error) { |
||||
n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF) |
||||
if n < len(s) { |
||||
if !ok { |
||||
err = transform.ErrEndOfSpan |
||||
} else { |
||||
err = transform.ErrShortSrc |
||||
} |
||||
} |
||||
return n, err |
||||
} |
||||
|
||||
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
|
||||
// whether any non-normalized parts were found. If atEOF is false, n will
|
||||
// not point past the last segment if this segment might be become
|
||||
// non-normalized by appending other runes.
|
||||
func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) { |
||||
var lastCC uint8 |
||||
ss := streamSafe(0) |
||||
lastSegStart := i |
||||
for n = end; i < n; { |
||||
if j := src.skipASCII(i, n); i != j { |
||||
i = j |
||||
lastSegStart = i - 1 |
||||
lastCC = 0 |
||||
ss = 0 |
||||
continue |
||||
} |
||||
info := f.info(src, i) |
||||
if info.size == 0 { |
||||
if atEOF { |
||||
// include incomplete runes
|
||||
return n, true |
||||
} |
||||
return lastSegStart, true |
||||
} |
||||
// This block needs to be before the next, because it is possible to
|
||||
// have an overflow for runes that are starters (e.g. with U+FF9E).
|
||||
switch ss.next(info) { |
||||
case ssStarter: |
||||
lastSegStart = i |
||||
case ssOverflow: |
||||
return lastSegStart, false |
||||
case ssSuccess: |
||||
if lastCC > info.ccc { |
||||
return lastSegStart, false |
||||
} |
||||
} |
||||
if f.composing { |
||||
if !info.isYesC() { |
||||
break |
||||
} |
||||
} else { |
||||
if !info.isYesD() { |
||||
break |
||||
} |
||||
} |
||||
lastCC = info.ccc |
||||
i += int(info.size) |
||||
} |
||||
if i == n { |
||||
if !atEOF { |
||||
n = lastSegStart |
||||
} |
||||
return n, true |
||||
} |
||||
return lastSegStart, false |
||||
} |
||||
|
||||
// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
|
||||
// It is not guaranteed to return the largest such n.
|
||||
func (f Form) QuickSpanString(s string) int { |
||||
n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true) |
||||
return n |
||||
} |
||||
|
||||
// FirstBoundary returns the position i of the first boundary in b
|
||||
// or -1 if b contains no boundary.
|
||||
func (f Form) FirstBoundary(b []byte) int { |
||||
return f.firstBoundary(inputBytes(b), len(b)) |
||||
} |
||||
|
||||
func (f Form) firstBoundary(src input, nsrc int) int { |
||||
i := src.skipContinuationBytes(0) |
||||
if i >= nsrc { |
||||
return -1 |
||||
} |
||||
fd := formTable[f] |
||||
ss := streamSafe(0) |
||||
// We should call ss.first here, but we can't as the first rune is
|
||||
// skipped already. This means FirstBoundary can't really determine
|
||||
// CGJ insertion points correctly. Luckily it doesn't have to.
|
||||
for { |
||||
info := fd.info(src, i) |
||||
if info.size == 0 { |
||||
return -1 |
||||
} |
||||
if s := ss.next(info); s != ssSuccess { |
||||
return i |
||||
} |
||||
i += int(info.size) |
||||
if i >= nsrc { |
||||
if !info.BoundaryAfter() && !ss.isMax() { |
||||
return -1 |
||||
} |
||||
return nsrc |
||||
} |
||||
} |
||||
} |
||||
|
||||
// FirstBoundaryInString returns the position i of the first boundary in s
|
||||
// or -1 if s contains no boundary.
|
||||
func (f Form) FirstBoundaryInString(s string) int { |
||||
return f.firstBoundary(inputString(s), len(s)) |
||||
} |
||||
|
||||
// NextBoundary reports the index of the boundary between the first and next
|
||||
// segment in b or -1 if atEOF is false and there are not enough bytes to
|
||||
// determine this boundary.
|
||||
func (f Form) NextBoundary(b []byte, atEOF bool) int { |
||||
return f.nextBoundary(inputBytes(b), len(b), atEOF) |
||||
} |
||||
|
||||
// NextBoundaryInString reports the index of the boundary between the first and
|
||||
// next segment in b or -1 if atEOF is false and there are not enough bytes to
|
||||
// determine this boundary.
|
||||
func (f Form) NextBoundaryInString(s string, atEOF bool) int { |
||||
return f.nextBoundary(inputString(s), len(s), atEOF) |
||||
} |
||||
|
||||
func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int { |
||||
if nsrc == 0 { |
||||
if atEOF { |
||||
return 0 |
||||
} |
||||
return -1 |
||||
} |
||||
fd := formTable[f] |
||||
info := fd.info(src, 0) |
||||
if info.size == 0 { |
||||
if atEOF { |
||||
return 1 |
||||
} |
||||
return -1 |
||||
} |
||||
ss := streamSafe(0) |
||||
ss.first(info) |
||||
|
||||
for i := int(info.size); i < nsrc; i += int(info.size) { |
||||
info = fd.info(src, i) |
||||
if info.size == 0 { |
||||
if atEOF { |
||||
return i |
||||
} |
||||
return -1 |
||||
} |
||||
// TODO: Using streamSafe to determine the boundary isn't the same as
|
||||
// using BoundaryBefore. Determine which should be used.
|
||||
if s := ss.next(info); s != ssSuccess { |
||||
return i |
||||
} |
||||
} |
||||
if !atEOF && !info.BoundaryAfter() && !ss.isMax() { |
||||
return -1 |
||||
} |
||||
return nsrc |
||||
} |
||||
|
||||
// LastBoundary returns the position i of the last boundary in b
|
||||
// or -1 if b contains no boundary.
|
||||
func (f Form) LastBoundary(b []byte) int { |
||||
return lastBoundary(formTable[f], b) |
||||
} |
||||
|
||||
func lastBoundary(fd *formInfo, b []byte) int { |
||||
i := len(b) |
||||
info, p := lastRuneStart(fd, b) |
||||
if p == -1 { |
||||
return -1 |
||||
} |
||||
if info.size == 0 { // ends with incomplete rune
|
||||
if p == 0 { // starts with incomplete rune
|
||||
return -1 |
||||
} |
||||
i = p |
||||
info, p = lastRuneStart(fd, b[:i]) |
||||
if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
|
||||
return i |
||||
} |
||||
} |
||||
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
|
||||
return i |
||||
} |
||||
if info.BoundaryAfter() { |
||||
return i |
||||
} |
||||
ss := streamSafe(0) |
||||
v := ss.backwards(info) |
||||
for i = p; i >= 0 && v != ssStarter; i = p { |
||||
info, p = lastRuneStart(fd, b[:i]) |
||||
if v = ss.backwards(info); v == ssOverflow { |
||||
break |
||||
} |
||||
if p+int(info.size) != i { |
||||
if p == -1 { // no boundary found
|
||||
return -1 |
||||
} |
||||
return i // boundary after an illegal UTF-8 encoding
|
||||
} |
||||
} |
||||
return i |
||||
} |
||||
|
||||
// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
|
||||
// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
|
||||
// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
|
||||
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int { |
||||
// Force one character to be consumed.
|
||||
info := rb.f.info(rb.src, sp) |
||||
if info.size == 0 { |
||||
return 0 |
||||
} |
||||
if s := rb.ss.next(info); s == ssStarter { |
||||
// TODO: this could be removed if we don't support merging.
|
||||
if rb.nrune > 0 { |
||||
goto end |
||||
} |
||||
} else if s == ssOverflow { |
||||
rb.insertCGJ() |
||||
goto end |
||||
} |
||||
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { |
||||
return int(err) |
||||
} |
||||
for { |
||||
sp += int(info.size) |
||||
if sp >= rb.nsrc { |
||||
if !atEOF && !info.BoundaryAfter() { |
||||
return int(iShortSrc) |
||||
} |
||||
break |
||||
} |
||||
info = rb.f.info(rb.src, sp) |
||||
if info.size == 0 { |
||||
if !atEOF { |
||||
return int(iShortSrc) |
||||
} |
||||
break |
||||
} |
||||
if s := rb.ss.next(info); s == ssStarter { |
||||
break |
||||
} else if s == ssOverflow { |
||||
rb.insertCGJ() |
||||
break |
||||
} |
||||
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess { |
||||
return int(err) |
||||
} |
||||
} |
||||
end: |
||||
if !rb.doFlush() { |
||||
return int(iShortDst) |
||||
} |
||||
return sp |
||||
} |
||||
|
||||
// lastRuneStart returns the runeInfo and position of the last
|
||||
// rune in buf or the zero runeInfo and -1 if no rune was found.
|
||||
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) { |
||||
p := len(buf) - 1 |
||||
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- { |
||||
} |
||||
if p < 0 { |
||||
return Properties{}, -1 |
||||
} |
||||
return fd.info(inputBytes(buf), p), p |
||||
} |
||||
|
||||
// decomposeToLastBoundary finds an open segment at the end of the buffer
|
||||
// and scans it into rb. Returns the buffer minus the last segment.
|
||||
func decomposeToLastBoundary(rb *reorderBuffer) { |
||||
fd := &rb.f |
||||
info, i := lastRuneStart(fd, rb.out) |
||||
if int(info.size) != len(rb.out)-i { |
||||
// illegal trailing continuation bytes
|
||||
return |
||||
} |
||||
if info.BoundaryAfter() { |
||||
return |
||||
} |
||||
var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
|
||||
padd := 0 |
||||
ss := streamSafe(0) |
||||
p := len(rb.out) |
||||
for { |
||||
add[padd] = info |
||||
v := ss.backwards(info) |
||||
if v == ssOverflow { |
||||
// Note that if we have an overflow, it the string we are appending to
|
||||
// is not correctly normalized. In this case the behavior is undefined.
|
||||
break |
||||
} |
||||
padd++ |
||||
p -= int(info.size) |
||||
if v == ssStarter || p < 0 { |
||||
break |
||||
} |
||||
info, i = lastRuneStart(fd, rb.out[:p]) |
||||
if int(info.size) != p-i { |
||||
break |
||||
} |
||||
} |
||||
rb.ss = ss |
||||
// Copy bytes for insertion as we may need to overwrite rb.out.
|
||||
var buf [maxBufferSize * utf8.UTFMax]byte |
||||
cp := buf[:copy(buf[:], rb.out[p:])] |
||||
rb.out = rb.out[:p] |
||||
for padd--; padd >= 0; padd-- { |
||||
info = add[padd] |
||||
rb.insertUnsafe(inputBytes(cp), 0, info) |
||||
cp = cp[info.size:] |
||||
} |
||||
} |
@ -0,0 +1,125 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm |
||||
|
||||
import "io" |
||||
|
||||
type normWriter struct { |
||||
rb reorderBuffer |
||||
w io.Writer |
||||
buf []byte |
||||
} |
||||
|
||||
// Write implements the standard write interface. If the last characters are
|
||||
// not at a normalization boundary, the bytes will be buffered for the next
|
||||
// write. The remaining bytes will be written on close.
|
||||
func (w *normWriter) Write(data []byte) (n int, err error) { |
||||
// Process data in pieces to keep w.buf size bounded.
|
||||
const chunk = 4000 |
||||
|
||||
for len(data) > 0 { |
||||
// Normalize into w.buf.
|
||||
m := len(data) |
||||
if m > chunk { |
||||
m = chunk |
||||
} |
||||
w.rb.src = inputBytes(data[:m]) |
||||
w.rb.nsrc = m |
||||
w.buf = doAppend(&w.rb, w.buf, 0) |
||||
data = data[m:] |
||||
n += m |
||||
|
||||
// Write out complete prefix, save remainder.
|
||||
// Note that lastBoundary looks back at most 31 runes.
|
||||
i := lastBoundary(&w.rb.f, w.buf) |
||||
if i == -1 { |
||||
i = 0 |
||||
} |
||||
if i > 0 { |
||||
if _, err = w.w.Write(w.buf[:i]); err != nil { |
||||
break |
||||
} |
||||
bn := copy(w.buf, w.buf[i:]) |
||||
w.buf = w.buf[:bn] |
||||
} |
||||
} |
||||
return n, err |
||||
} |
||||
|
||||
// Close forces data that remains in the buffer to be written.
|
||||
func (w *normWriter) Close() error { |
||||
if len(w.buf) > 0 { |
||||
_, err := w.w.Write(w.buf) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// Writer returns a new writer that implements Write(b)
|
||||
// by writing f(b) to w. The returned writer may use an
|
||||
// internal buffer to maintain state across Write calls.
|
||||
// Calling its Close method writes any buffered data to w.
|
||||
func (f Form) Writer(w io.Writer) io.WriteCloser { |
||||
wr := &normWriter{rb: reorderBuffer{}, w: w} |
||||
wr.rb.init(f, nil) |
||||
return wr |
||||
} |
||||
|
||||
type normReader struct { |
||||
rb reorderBuffer |
||||
r io.Reader |
||||
inbuf []byte |
||||
outbuf []byte |
||||
bufStart int |
||||
lastBoundary int |
||||
err error |
||||
} |
||||
|
||||
// Read implements the standard read interface.
|
||||
func (r *normReader) Read(p []byte) (int, error) { |
||||
for { |
||||
if r.lastBoundary-r.bufStart > 0 { |
||||
n := copy(p, r.outbuf[r.bufStart:r.lastBoundary]) |
||||
r.bufStart += n |
||||
if r.lastBoundary-r.bufStart > 0 { |
||||
return n, nil |
||||
} |
||||
return n, r.err |
||||
} |
||||
if r.err != nil { |
||||
return 0, r.err |
||||
} |
||||
outn := copy(r.outbuf, r.outbuf[r.lastBoundary:]) |
||||
r.outbuf = r.outbuf[0:outn] |
||||
r.bufStart = 0 |
||||
|
||||
n, err := r.r.Read(r.inbuf) |
||||
r.rb.src = inputBytes(r.inbuf[0:n]) |
||||
r.rb.nsrc, r.err = n, err |
||||
if n > 0 { |
||||
r.outbuf = doAppend(&r.rb, r.outbuf, 0) |
||||
} |
||||
if err == io.EOF { |
||||
r.lastBoundary = len(r.outbuf) |
||||
} else { |
||||
r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf) |
||||
if r.lastBoundary == -1 { |
||||
r.lastBoundary = 0 |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Reader returns a new reader that implements Read
|
||||
// by reading data from r and returning f(data).
|
||||
func (f Form) Reader(r io.Reader) io.Reader { |
||||
const chunk = 4000 |
||||
buf := make([]byte, chunk) |
||||
rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf} |
||||
rr.rb.init(f, buf) |
||||
return rr |
||||
} |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,88 @@ |
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm |
||||
|
||||
import ( |
||||
"unicode/utf8" |
||||
|
||||
"golang.org/x/text/transform" |
||||
) |
||||
|
||||
// Reset implements the Reset method of the transform.Transformer interface.
|
||||
func (Form) Reset() {} |
||||
|
||||
// Transform implements the Transform method of the transform.Transformer
|
||||
// interface. It may need to write segments of up to MaxSegmentSize at once.
|
||||
// Users should either catch ErrShortDst and allow dst to grow or have dst be at
|
||||
// least of size MaxTransformChunkSize to be guaranteed of progress.
|
||||
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
||||
// Cap the maximum number of src bytes to check.
|
||||
b := src |
||||
eof := atEOF |
||||
if ns := len(dst); ns < len(b) { |
||||
err = transform.ErrShortDst |
||||
eof = false |
||||
b = b[:ns] |
||||
} |
||||
i, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), eof) |
||||
n := copy(dst, b[:i]) |
||||
if !ok { |
||||
nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF) |
||||
return nDst + n, nSrc + n, err |
||||
} |
||||
|
||||
if err == nil && n < len(src) && !atEOF { |
||||
err = transform.ErrShortSrc |
||||
} |
||||
return n, n, err |
||||
} |
||||
|
||||
func flushTransform(rb *reorderBuffer) bool { |
||||
// Write out (must fully fit in dst, or else it is an ErrShortDst).
|
||||
if len(rb.out) < rb.nrune*utf8.UTFMax { |
||||
return false |
||||
} |
||||
rb.out = rb.out[rb.flushCopy(rb.out):] |
||||
return true |
||||
} |
||||
|
||||
var errs = []error{nil, transform.ErrShortDst, transform.ErrShortSrc} |
||||
|
||||
// transform implements the transform.Transformer interface. It is only called
|
||||
// when quickSpan does not pass for a given string.
|
||||
func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
||||
// TODO: get rid of reorderBuffer. See CL 23460044.
|
||||
rb := reorderBuffer{} |
||||
rb.init(f, src) |
||||
for { |
||||
// Load segment into reorder buffer.
|
||||
rb.setFlusher(dst[nDst:], flushTransform) |
||||
end := decomposeSegment(&rb, nSrc, atEOF) |
||||
if end < 0 { |
||||
return nDst, nSrc, errs[-end] |
||||
} |
||||
nDst = len(dst) - len(rb.out) |
||||
nSrc = end |
||||
|
||||
// Next quickSpan.
|
||||
end = rb.nsrc |
||||
eof := atEOF |
||||
if n := nSrc + len(dst) - nDst; n < end { |
||||
err = transform.ErrShortDst |
||||
end = n |
||||
eof = false |
||||
} |
||||
end, ok := rb.f.quickSpan(rb.src, nSrc, end, eof) |
||||
n := copy(dst[nDst:], rb.src.bytes[nSrc:end]) |
||||
nSrc += n |
||||
nDst += n |
||||
if ok { |
||||
if err == nil && n < rb.nsrc && !atEOF { |
||||
err = transform.ErrShortSrc |
||||
} |
||||
return nDst, nSrc, err |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,54 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm |
||||
|
||||
type valueRange struct { |
||||
value uint16 // header: value:stride
|
||||
lo, hi byte // header: lo:n
|
||||
} |
||||
|
||||
type sparseBlocks struct { |
||||
values []valueRange |
||||
offset []uint16 |
||||
} |
||||
|
||||
var nfcSparse = sparseBlocks{ |
||||
values: nfcSparseValues[:], |
||||
offset: nfcSparseOffset[:], |
||||
} |
||||
|
||||
var nfkcSparse = sparseBlocks{ |
||||
values: nfkcSparseValues[:], |
||||
offset: nfkcSparseOffset[:], |
||||
} |
||||
|
||||
var ( |
||||
nfcData = newNfcTrie(0) |
||||
nfkcData = newNfkcTrie(0) |
||||
) |
||||
|
||||
// lookupValue determines the type of block n and looks up the value for b.
|
||||
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
|
||||
// is a list of ranges with an accompanying value. Given a matching range r,
|
||||
// the value for b is by r.value + (b - r.lo) * stride.
|
||||
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 { |
||||
offset := t.offset[n] |
||||
header := t.values[offset] |
||||
lo := offset + 1 |
||||
hi := lo + uint16(header.lo) |
||||
for lo < hi { |
||||
m := lo + (hi-lo)/2 |
||||
r := t.values[m] |
||||
if r.lo <= b && b <= r.hi { |
||||
return r.value + uint16(b-r.lo)*header.value |
||||
} |
||||
if b < r.lo { |
||||
hi = m |
||||
} else { |
||||
lo = m + 1 |
||||
} |
||||
} |
||||
return 0 |
||||
} |
@ -0,0 +1,117 @@ |
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Trie table generator.
|
||||
// Used by make*tables tools to generate a go file with trie data structures
|
||||
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
|
||||
// sequence are used to lookup offsets in the index table to be used for the
|
||||
// next byte. The last byte is used to index into a table with 16-bit values.
|
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"io" |
||||
) |
||||
|
||||
const maxSparseEntries = 16 |
||||
|
||||
type normCompacter struct { |
||||
sparseBlocks [][]uint64 |
||||
sparseOffset []uint16 |
||||
sparseCount int |
||||
name string |
||||
} |
||||
|
||||
func mostFrequentStride(a []uint64) int { |
||||
counts := make(map[int]int) |
||||
var v int |
||||
for _, x := range a { |
||||
if stride := int(x) - v; v != 0 && stride >= 0 { |
||||
counts[stride]++ |
||||
} |
||||
v = int(x) |
||||
} |
||||
var maxs, maxc int |
||||
for stride, cnt := range counts { |
||||
if cnt > maxc || (cnt == maxc && stride < maxs) { |
||||
maxs, maxc = stride, cnt |
||||
} |
||||
} |
||||
return maxs |
||||
} |
||||
|
||||
func countSparseEntries(a []uint64) int { |
||||
stride := mostFrequentStride(a) |
||||
var v, count int |
||||
for _, tv := range a { |
||||
if int(tv)-v != stride { |
||||
if tv != 0 { |
||||
count++ |
||||
} |
||||
} |
||||
v = int(tv) |
||||
} |
||||
return count |
||||
} |
||||
|
||||
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) { |
||||
if n := countSparseEntries(v); n <= maxSparseEntries { |
||||
return (n+1)*4 + 2, true |
||||
} |
||||
return 0, false |
||||
} |
||||
|
||||
func (c *normCompacter) Store(v []uint64) uint32 { |
||||
h := uint32(len(c.sparseOffset)) |
||||
c.sparseBlocks = append(c.sparseBlocks, v) |
||||
c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount)) |
||||
c.sparseCount += countSparseEntries(v) + 1 |
||||
return h |
||||
} |
||||
|
||||
func (c *normCompacter) Handler() string { |
||||
return c.name + "Sparse.lookup" |
||||
} |
||||
|
||||
func (c *normCompacter) Print(w io.Writer) (retErr error) { |
||||
p := func(f string, x ...interface{}) { |
||||
if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil { |
||||
retErr = err |
||||
} |
||||
} |
||||
|
||||
ls := len(c.sparseBlocks) |
||||
p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2) |
||||
p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset) |
||||
|
||||
ns := c.sparseCount |
||||
p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4) |
||||
p("var %sSparseValues = [%d]valueRange {", c.name, ns) |
||||
for i, b := range c.sparseBlocks { |
||||
p("\n// Block %#x, offset %#x", i, c.sparseOffset[i]) |
||||
var v int |
||||
stride := mostFrequentStride(b) |
||||
n := countSparseEntries(b) |
||||
p("\n{value:%#04x,lo:%#02x},", stride, uint8(n)) |
||||
for i, nv := range b { |
||||
if int(nv)-v != stride { |
||||
if v != 0 { |
||||
p(",hi:%#02x},", 0x80+i-1) |
||||
} |
||||
if nv != 0 { |
||||
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i) |
||||
} |
||||
} |
||||
v = int(nv) |
||||
} |
||||
if v != 0 { |
||||
p(",hi:%#02x},", 0x80+len(b)-1) |
||||
} |
||||
} |
||||
p("\n}\n\n") |
||||
return |
||||
} |
Loading…
Reference in new issue