libgo: update to go1.7rc3
Reviewed-on: https://go-review.googlesource.com/25150 From-SVN: r238662
This commit is contained in:
parent
9d04a3af4c
commit
22b955cca5
1155 changed files with 51833 additions and 16672 deletions
|
@ -36,7 +36,6 @@ type bitState struct {
|
|||
|
||||
end int
|
||||
cap []int
|
||||
input input
|
||||
jobs []job
|
||||
visited []uint32
|
||||
}
|
||||
|
@ -146,7 +145,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
|||
// Optimization: rather than push and pop,
|
||||
// code that is going to Push and continue
|
||||
// the loop simply updates ip, p, and arg
|
||||
// and jumps to CheckAndLoop. We have to
|
||||
// and jumps to CheckAndLoop. We have to
|
||||
// do the ShouldVisit check that Push
|
||||
// would have, but we avoid the stack
|
||||
// manipulation.
|
||||
|
@ -254,7 +253,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
|||
|
||||
}
|
||||
panic("bad arg in InstCapture")
|
||||
continue
|
||||
|
||||
case syntax.InstEmptyWidth:
|
||||
if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 {
|
||||
|
@ -299,7 +297,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
|||
// Otherwise, continue on in hope of a longer match.
|
||||
continue
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
return m.matched
|
||||
|
|
|
@ -19,7 +19,7 @@ type queue struct {
|
|||
// A entry is an entry on a queue.
|
||||
// It holds both the instruction pc and the actual thread.
|
||||
// Some queue entries are just place holders so that the machine
|
||||
// knows it has considered that pc. Such entries have t == nil.
|
||||
// knows it has considered that pc. Such entries have t == nil.
|
||||
type entry struct {
|
||||
pc uint32
|
||||
t *thread
|
||||
|
@ -107,14 +107,6 @@ func (m *machine) alloc(i *syntax.Inst) *thread {
|
|||
return t
|
||||
}
|
||||
|
||||
// free returns t to the free pool.
|
||||
func (m *machine) free(t *thread) {
|
||||
m.inputBytes.str = nil
|
||||
m.inputString.str = ""
|
||||
m.inputReader.r = nil
|
||||
m.pool = append(m.pool, t)
|
||||
}
|
||||
|
||||
// match runs the machine over the input starting at pos.
|
||||
// It reports whether a match was found.
|
||||
// If so, m.matchcap holds the submatch information.
|
||||
|
@ -192,7 +184,6 @@ func (m *machine) match(i input, pos int) bool {
|
|||
func (m *machine) clear(q *queue) {
|
||||
for _, d := range q.dense {
|
||||
if d.t != nil {
|
||||
// m.free(d.t)
|
||||
m.pool = append(m.pool, d.t)
|
||||
}
|
||||
}
|
||||
|
@ -213,7 +204,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
|
|||
continue
|
||||
}
|
||||
if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
|
||||
// m.free(t)
|
||||
m.pool = append(m.pool, t)
|
||||
continue
|
||||
}
|
||||
|
@ -232,7 +222,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
|
|||
// First-match mode: cut off all lower-priority threads.
|
||||
for _, d := range runq.dense[j+1:] {
|
||||
if d.t != nil {
|
||||
// m.free(d.t)
|
||||
m.pool = append(m.pool, d.t)
|
||||
}
|
||||
}
|
||||
|
@ -253,7 +242,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
|
|||
t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t)
|
||||
}
|
||||
if t != nil {
|
||||
// m.free(t)
|
||||
m.pool = append(m.pool, t)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import (
|
|||
// considered during RE2's exhaustive tests, which run all possible
|
||||
// regexps over a given set of atoms and operators, up to a given
|
||||
// complexity, over all possible strings over a given alphabet,
|
||||
// up to a given size. Rather than try to link with RE2, we read a
|
||||
// up to a given size. Rather than try to link with RE2, we read a
|
||||
// log file containing the test cases and the expected matches.
|
||||
// The log file, re2-exhaustive.txt, is generated by running 'make log'
|
||||
// in the open source RE2 distribution https://github.com/google/re2/.
|
||||
|
@ -41,21 +41,21 @@ import (
|
|||
// -;0-3 0-1 1-2 2-3
|
||||
//
|
||||
// The stanza begins by defining a set of strings, quoted
|
||||
// using Go double-quote syntax, one per line. Then the
|
||||
// using Go double-quote syntax, one per line. Then the
|
||||
// regexps section gives a sequence of regexps to run on
|
||||
// the strings. In the block that follows a regexp, each line
|
||||
// the strings. In the block that follows a regexp, each line
|
||||
// gives the semicolon-separated match results of running
|
||||
// the regexp on the corresponding string.
|
||||
// Each match result is either a single -, meaning no match, or a
|
||||
// space-separated sequence of pairs giving the match and
|
||||
// submatch indices. An unmatched subexpression formats
|
||||
// submatch indices. An unmatched subexpression formats
|
||||
// its pair as a single - (not illustrated above). For now
|
||||
// each regexp run produces two match results, one for a
|
||||
// ``full match'' that restricts the regexp to matching the entire
|
||||
// string or nothing, and one for a ``partial match'' that gives
|
||||
// the leftmost first match found in the string.
|
||||
//
|
||||
// Lines beginning with # are comments. Lines beginning with
|
||||
// Lines beginning with # are comments. Lines beginning with
|
||||
// a capital letter are test names printed during RE2's test suite
|
||||
// and are echoed into t but otherwise ignored.
|
||||
//
|
||||
|
@ -155,9 +155,9 @@ func testRE2(t *testing.T, file string) {
|
|||
if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
|
||||
// RE2's \B considers every byte position,
|
||||
// so it sees 'not word boundary' in the
|
||||
// middle of UTF-8 sequences. This package
|
||||
// middle of UTF-8 sequences. This package
|
||||
// only considers the positions between runes,
|
||||
// so it disagrees. Skip those cases.
|
||||
// so it disagrees. Skip those cases.
|
||||
continue
|
||||
}
|
||||
res := strings.Split(line, ";")
|
||||
|
@ -409,7 +409,7 @@ Reading:
|
|||
// h REG_MULTIREF multiple digit backref
|
||||
// i REG_ICASE ignore case
|
||||
// j REG_SPAN . matches \n
|
||||
// k REG_ESCAPE \ to ecape [...] delimiter
|
||||
// k REG_ESCAPE \ to escape [...] delimiter
|
||||
// l REG_LEFT implicit ^...
|
||||
// m REG_MINIMAL minimal match
|
||||
// n REG_NEWLINE explicit \n match
|
||||
|
@ -658,47 +658,42 @@ func makeText(n int) []byte {
|
|||
return text
|
||||
}
|
||||
|
||||
func benchmark(b *testing.B, re string, n int) {
|
||||
r := MustCompile(re)
|
||||
t := makeText(n)
|
||||
b.ResetTimer()
|
||||
b.SetBytes(int64(n))
|
||||
for i := 0; i < b.N; i++ {
|
||||
if r.Match(t) {
|
||||
b.Fatal("match!")
|
||||
func BenchmarkMatch(b *testing.B) {
|
||||
for _, data := range benchData {
|
||||
r := MustCompile(data.re)
|
||||
for _, size := range benchSizes {
|
||||
t := makeText(size.n)
|
||||
b.Run(data.name+"/"+size.name, func(b *testing.B) {
|
||||
b.SetBytes(int64(size.n))
|
||||
for i := 0; i < b.N; i++ {
|
||||
if r.Match(t) {
|
||||
b.Fatal("match!")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
|
||||
easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
|
||||
medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
|
||||
hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
|
||||
parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
|
||||
"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
|
||||
)
|
||||
var benchData = []struct{ name, re string }{
|
||||
{"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
|
||||
{"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
|
||||
{"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
|
||||
{"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
|
||||
{"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
|
||||
{"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
|
||||
}
|
||||
|
||||
func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) }
|
||||
func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) }
|
||||
func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) }
|
||||
func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) }
|
||||
func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) }
|
||||
func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) }
|
||||
func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) }
|
||||
func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) }
|
||||
func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) }
|
||||
func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) }
|
||||
func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 32<<0) }
|
||||
func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) }
|
||||
func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
|
||||
func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) }
|
||||
func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
|
||||
func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) }
|
||||
func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) }
|
||||
func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) }
|
||||
func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) }
|
||||
func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) }
|
||||
var benchSizes = []struct {
|
||||
name string
|
||||
n int
|
||||
}{
|
||||
{"32", 32},
|
||||
{"1K", 1 << 10},
|
||||
{"32K", 32 << 10},
|
||||
{"1M", 1 << 20},
|
||||
{"32M", 32 << 20},
|
||||
}
|
||||
|
||||
func TestLongest(t *testing.T) {
|
||||
re, err := Compile(`a(|b)`)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
@ -33,7 +33,7 @@ type onePassInst struct {
|
|||
}
|
||||
|
||||
// OnePassPrefix returns a literal string that all matches for the
|
||||
// regexp must start with. Complete is true if the prefix
|
||||
// regexp must start with. Complete is true if the prefix
|
||||
// is the entire match. Pc is the index of the last rune instruction
|
||||
// in the string. The OnePassPrefix skips over the mandatory
|
||||
// EmptyBeginText
|
||||
|
@ -450,7 +450,7 @@ func makeOnePass(p *onePassProg) *onePassProg {
|
|||
for !instQueue.empty() {
|
||||
visitQueue.clear()
|
||||
pc := instQueue.next()
|
||||
if !check(uint32(pc), m) {
|
||||
if !check(pc, m) {
|
||||
p = notOnePass
|
||||
break
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
@ -133,8 +133,6 @@ func TestMergeRuneSet(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
const noStr = `!`
|
||||
|
||||
var onePass = &onePassProg{}
|
||||
|
||||
var onePassTests = []struct {
|
||||
|
|
|
@ -22,14 +22,14 @@
|
|||
// All characters are UTF-8-encoded code points.
|
||||
//
|
||||
// There are 16 methods of Regexp that match a regular expression and identify
|
||||
// the matched text. Their names are matched by this regular expression:
|
||||
// the matched text. Their names are matched by this regular expression:
|
||||
//
|
||||
// Find(All)?(String)?(Submatch)?(Index)?
|
||||
//
|
||||
// If 'All' is present, the routine matches successive non-overlapping
|
||||
// matches of the entire expression. Empty matches abutting a preceding
|
||||
// match are ignored. The return value is a slice containing the successive
|
||||
// return values of the corresponding non-'All' routine. These routines take
|
||||
// matches of the entire expression. Empty matches abutting a preceding
|
||||
// match are ignored. The return value is a slice containing the successive
|
||||
// return values of the corresponding non-'All' routine. These routines take
|
||||
// an extra integer argument, n; if n >= 0, the function returns at most n
|
||||
// matches/submatches.
|
||||
//
|
||||
|
@ -45,9 +45,9 @@
|
|||
//
|
||||
// If 'Index' is present, matches and submatches are identified by byte index
|
||||
// pairs within the input string: result[2*n:2*n+1] identifies the indexes of
|
||||
// the nth submatch. The pair for n==0 identifies the match of the entire
|
||||
// expression. If 'Index' is not present, the match is identified by the
|
||||
// text of the match/submatch. If an index is negative, it means that
|
||||
// the nth submatch. The pair for n==0 identifies the match of the entire
|
||||
// expression. If 'Index' is not present, the match is identified by the
|
||||
// text of the match/submatch. If an index is negative, it means that
|
||||
// subexpression did not match any string in the input.
|
||||
//
|
||||
// There is also a subset of the methods that can be applied to text read
|
||||
|
@ -55,7 +55,7 @@
|
|||
//
|
||||
// MatchReader, FindReaderIndex, FindReaderSubmatchIndex
|
||||
//
|
||||
// This set may grow. Note that regular expression matches may need to
|
||||
// This set may grow. Note that regular expression matches may need to
|
||||
// examine text beyond the text returned by a match, so the methods that
|
||||
// match text from a RuneReader may read arbitrarily far into the input
|
||||
// before returning.
|
||||
|
@ -75,12 +75,18 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var debug = false
|
||||
|
||||
// Regexp is the representation of a compiled regular expression.
|
||||
// A Regexp is safe for concurrent use by multiple goroutines.
|
||||
type Regexp struct {
|
||||
// read-only after Compile
|
||||
regexpRO
|
||||
|
||||
// cache of machines for running regexp
|
||||
mu sync.Mutex
|
||||
machine []*machine
|
||||
}
|
||||
|
||||
type regexpRO struct {
|
||||
expr string // as passed to Compile
|
||||
prog *syntax.Prog // compiled program
|
||||
onepass *onePassProg // onepass program or nil
|
||||
|
@ -93,10 +99,6 @@ type Regexp struct {
|
|||
numSubexp int
|
||||
subexpNames []string
|
||||
longest bool
|
||||
|
||||
// cache of machines for running regexp
|
||||
mu sync.Mutex
|
||||
machine []*machine
|
||||
}
|
||||
|
||||
// String returns the source text used to compile the regular expression.
|
||||
|
@ -109,10 +111,11 @@ func (re *Regexp) String() string {
|
|||
// When using a Regexp in multiple goroutines, giving each goroutine
|
||||
// its own copy helps to avoid lock contention.
|
||||
func (re *Regexp) Copy() *Regexp {
|
||||
r := *re
|
||||
r.mu = sync.Mutex{}
|
||||
r.machine = nil
|
||||
return &r
|
||||
// It is not safe to copy Regexp by value
|
||||
// since it contains a sync.Mutex.
|
||||
return &Regexp{
|
||||
regexpRO: re.regexpRO,
|
||||
}
|
||||
}
|
||||
|
||||
// Compile parses a regular expression and returns, if successful,
|
||||
|
@ -174,13 +177,15 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
|
|||
return nil, err
|
||||
}
|
||||
regexp := &Regexp{
|
||||
expr: expr,
|
||||
prog: prog,
|
||||
onepass: compileOnePass(prog),
|
||||
numSubexp: maxCap,
|
||||
subexpNames: capNames,
|
||||
cond: prog.StartCond(),
|
||||
longest: longest,
|
||||
regexpRO: regexpRO{
|
||||
expr: expr,
|
||||
prog: prog,
|
||||
onepass: compileOnePass(prog),
|
||||
numSubexp: maxCap,
|
||||
subexpNames: capNames,
|
||||
cond: prog.StartCond(),
|
||||
longest: longest,
|
||||
},
|
||||
}
|
||||
if regexp.onepass == notOnePass {
|
||||
regexp.prefix, regexp.prefixComplete = prog.Prefix()
|
||||
|
@ -258,10 +263,10 @@ func (re *Regexp) NumSubexp() int {
|
|||
}
|
||||
|
||||
// SubexpNames returns the names of the parenthesized subexpressions
|
||||
// in this Regexp. The name for the first sub-expression is names[1],
|
||||
// in this Regexp. The name for the first sub-expression is names[1],
|
||||
// so that if m is a match slice, the name for m[i] is SubexpNames()[i].
|
||||
// Since the Regexp as a whole cannot be named, names[0] is always
|
||||
// the empty string. The slice should not be modified.
|
||||
// the empty string. The slice should not be modified.
|
||||
func (re *Regexp) SubexpNames() []string {
|
||||
return re.subexpNames
|
||||
}
|
||||
|
@ -394,7 +399,7 @@ func (i *inputReader) context(pos int) syntax.EmptyOp {
|
|||
}
|
||||
|
||||
// LiteralPrefix returns a literal string that must begin any match
|
||||
// of the regular expression re. It returns the boolean true if the
|
||||
// of the regular expression re. It returns the boolean true if the
|
||||
// literal string comprises the entire regular expression.
|
||||
func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
|
||||
return re.prefix, re.prefixComplete
|
||||
|
@ -417,7 +422,7 @@ func (re *Regexp) Match(b []byte) bool {
|
|||
}
|
||||
|
||||
// MatchReader checks whether a textual regular expression matches the text
|
||||
// read by the RuneReader. More complicated queries need to use Compile and
|
||||
// read by the RuneReader. More complicated queries need to use Compile and
|
||||
// the full Regexp interface.
|
||||
func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
|
||||
re, err := Compile(pattern)
|
||||
|
@ -428,7 +433,7 @@ func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
|
|||
}
|
||||
|
||||
// MatchString checks whether a textual regular expression
|
||||
// matches a string. More complicated queries need
|
||||
// matches a string. More complicated queries need
|
||||
// to use Compile and the full Regexp interface.
|
||||
func MatchString(pattern string, s string) (matched bool, err error) {
|
||||
re, err := Compile(pattern)
|
||||
|
@ -439,7 +444,7 @@ func MatchString(pattern string, s string) (matched bool, err error) {
|
|||
}
|
||||
|
||||
// Match checks whether a textual regular expression
|
||||
// matches a byte slice. More complicated queries need
|
||||
// matches a byte slice. More complicated queries need
|
||||
// to use Compile and the full Regexp interface.
|
||||
func Match(pattern string, b []byte) (matched bool, err error) {
|
||||
re, err := Compile(pattern)
|
||||
|
@ -450,11 +455,11 @@ func Match(pattern string, b []byte) (matched bool, err error) {
|
|||
}
|
||||
|
||||
// ReplaceAllString returns a copy of src, replacing matches of the Regexp
|
||||
// with the replacement string repl. Inside repl, $ signs are interpreted as
|
||||
// with the replacement string repl. Inside repl, $ signs are interpreted as
|
||||
// in Expand, so for instance $1 represents the text of the first submatch.
|
||||
func (re *Regexp) ReplaceAllString(src, repl string) string {
|
||||
n := 2
|
||||
if strings.Index(repl, "$") >= 0 {
|
||||
if strings.Contains(repl, "$") {
|
||||
n = 2 * (re.numSubexp + 1)
|
||||
}
|
||||
b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
|
||||
|
@ -464,7 +469,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string {
|
|||
}
|
||||
|
||||
// ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp
|
||||
// with the replacement string repl. The replacement repl is substituted directly,
|
||||
// with the replacement string repl. The replacement repl is substituted directly,
|
||||
// without using Expand.
|
||||
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
|
||||
return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
|
||||
|
@ -474,7 +479,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
|
|||
|
||||
// ReplaceAllStringFunc returns a copy of src in which all matches of the
|
||||
// Regexp have been replaced by the return value of function repl applied
|
||||
// to the matched substring. The replacement returned by repl is substituted
|
||||
// to the matched substring. The replacement returned by repl is substituted
|
||||
// directly, without using Expand.
|
||||
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
|
||||
b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
|
||||
|
@ -530,7 +535,7 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst
|
|||
searchPos += width
|
||||
} else if searchPos+1 > a[1] {
|
||||
// This clause is only needed at the end of the input
|
||||
// string. In that case, DecodeRuneInString returns width=0.
|
||||
// string. In that case, DecodeRuneInString returns width=0.
|
||||
searchPos++
|
||||
} else {
|
||||
searchPos = a[1]
|
||||
|
@ -548,7 +553,7 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst
|
|||
}
|
||||
|
||||
// ReplaceAll returns a copy of src, replacing matches of the Regexp
|
||||
// with the replacement text repl. Inside repl, $ signs are interpreted as
|
||||
// with the replacement text repl. Inside repl, $ signs are interpreted as
|
||||
// in Expand, so for instance $1 represents the text of the first submatch.
|
||||
func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
|
||||
n := 2
|
||||
|
@ -566,7 +571,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
|
|||
}
|
||||
|
||||
// ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
|
||||
// with the replacement bytes repl. The replacement repl is substituted directly,
|
||||
// with the replacement bytes repl. The replacement repl is substituted directly,
|
||||
// without using Expand.
|
||||
func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
|
||||
return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
|
||||
|
@ -576,7 +581,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
|
|||
|
||||
// ReplaceAllFunc returns a copy of src in which all matches of the
|
||||
// Regexp have been replaced by the return value of function repl applied
|
||||
// to the matched byte slice. The replacement returned by repl is substituted
|
||||
// to the matched byte slice. The replacement returned by repl is substituted
|
||||
// directly, without using Expand.
|
||||
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
|
||||
return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
|
||||
|
@ -592,7 +597,7 @@ func special(b byte) bool {
|
|||
|
||||
// QuoteMeta returns a string that quotes all regular expression metacharacters
|
||||
// inside the argument text; the returned string is a regular expression matching
|
||||
// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
|
||||
// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
|
||||
func QuoteMeta(s string) string {
|
||||
b := make([]byte, 2*len(s))
|
||||
|
||||
|
@ -684,7 +689,7 @@ func (re *Regexp) Find(b []byte) []byte {
|
|||
}
|
||||
|
||||
// FindIndex returns a two-element slice of integers defining the location of
|
||||
// the leftmost match in b of the regular expression. The match itself is at
|
||||
// the leftmost match in b of the regular expression. The match itself is at
|
||||
// b[loc[0]:loc[1]].
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
||||
|
@ -696,9 +701,9 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
|||
}
|
||||
|
||||
// FindString returns a string holding the text of the leftmost match in s of the regular
|
||||
// expression. If there is no match, the return value is an empty string,
|
||||
// expression. If there is no match, the return value is an empty string,
|
||||
// but it will also be empty if the regular expression successfully matches
|
||||
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
|
||||
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
|
||||
// necessary to distinguish these cases.
|
||||
func (re *Regexp) FindString(s string) string {
|
||||
a := re.doExecute(nil, nil, s, 0, 2)
|
||||
|
@ -709,7 +714,7 @@ func (re *Regexp) FindString(s string) string {
|
|||
}
|
||||
|
||||
// FindStringIndex returns a two-element slice of integers defining the
|
||||
// location of the leftmost match in s of the regular expression. The match
|
||||
// location of the leftmost match in s of the regular expression. The match
|
||||
// itself is at s[loc[0]:loc[1]].
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindStringIndex(s string) (loc []int) {
|
||||
|
@ -722,7 +727,7 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) {
|
|||
|
||||
// FindReaderIndex returns a two-element slice of integers defining the
|
||||
// location of the leftmost match of the regular expression in text read from
|
||||
// the RuneReader. The match text was found in the input stream at
|
||||
// the RuneReader. The match text was found in the input stream at
|
||||
// byte offset loc[0] through loc[1]-1.
|
||||
// A return value of nil indicates no match.
|
||||
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
|
||||
|
@ -754,14 +759,14 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
|||
|
||||
// Expand appends template to dst and returns the result; during the
|
||||
// append, Expand replaces variables in the template with corresponding
|
||||
// matches drawn from src. The match slice should have been returned by
|
||||
// matches drawn from src. The match slice should have been returned by
|
||||
// FindSubmatchIndex.
|
||||
//
|
||||
// In the template, a variable is denoted by a substring of the form
|
||||
// $name or ${name}, where name is a non-empty sequence of letters,
|
||||
// digits, and underscores. A purely numeric name like $1 refers to
|
||||
// digits, and underscores. A purely numeric name like $1 refers to
|
||||
// the submatch with the corresponding index; other names refer to
|
||||
// capturing parentheses named with the (?P<name>...) syntax. A
|
||||
// capturing parentheses named with the (?P<name>...) syntax. A
|
||||
// reference to an out of range or unmatched index or a name that is not
|
||||
// present in the regular expression is replaced with an empty slice.
|
||||
//
|
||||
|
@ -920,7 +925,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
|||
// FindReaderSubmatchIndex returns a slice holding the index pairs
|
||||
// identifying the leftmost match of the regular expression of text read by
|
||||
// the RuneReader, and the matches, if any, of its subexpressions, as defined
|
||||
// by the 'Submatch' and 'Index' descriptions in the package comment. A
|
||||
// by the 'Submatch' and 'Index' descriptions in the package comment. A
|
||||
// return value of nil indicates no match.
|
||||
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
|
||||
return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
|
||||
|
|
|
@ -8,11 +8,11 @@ import "unicode"
|
|||
|
||||
// A patchList is a list of instruction pointers that need to be filled in (patched).
|
||||
// Because the pointers haven't been filled in yet, we can reuse their storage
|
||||
// to hold the list. It's kind of sleazy, but works well in practice.
|
||||
// to hold the list. It's kind of sleazy, but works well in practice.
|
||||
// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
|
||||
//
|
||||
// These aren't really pointers: they're integers, so we can reinterpret them
|
||||
// this way without using package unsafe. A value l denotes
|
||||
// this way without using package unsafe. A value l denotes
|
||||
// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
|
||||
// l == 0 denotes the empty list, okay because we start every program
|
||||
// with a fail instruction, so we'll never want to point at its output link.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
@ -66,7 +66,7 @@ Grouping:
|
|||
|
||||
Empty strings:
|
||||
^ at beginning of text or line (flag m=true)
|
||||
$ at end of text (like \z not \Z) or line (flag m=true)
|
||||
$ at end of text (like \z not Perl's \Z) or line (flag m=true)
|
||||
\A at beginning of text
|
||||
\b at ASCII word boundary (\w on one side and \W, \A, or \z on the other)
|
||||
\B not at ASCII word boundary
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
@ -141,9 +141,9 @@ func (p *parser) push(re *Regexp) *Regexp {
|
|||
}
|
||||
|
||||
// maybeConcat implements incremental concatenation
|
||||
// of literal runes into string nodes. The parser calls this
|
||||
// of literal runes into string nodes. The parser calls this
|
||||
// before each push, so only the top fragment of the stack
|
||||
// might need processing. Since this is called before a push,
|
||||
// might need processing. Since this is called before a push,
|
||||
// the topmost literal is no longer subject to operators like *
|
||||
// (Otherwise ab* would turn into (ab)*.)
|
||||
// If r >= 0 and there's a node left over, maybeConcat uses it
|
||||
|
@ -600,7 +600,7 @@ func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
|
|||
}
|
||||
|
||||
// removeLeadingString removes the first n leading runes
|
||||
// from the beginning of re. It returns the replacement for re.
|
||||
// from the beginning of re. It returns the replacement for re.
|
||||
func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
|
||||
if re.Op == OpConcat && len(re.Sub) > 0 {
|
||||
// Removing a leading string in a concatenation
|
||||
|
@ -957,11 +957,11 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) {
|
|||
// Perl 5.10 gave in and implemented the Python version too,
|
||||
// but they claim that the last two are the preferred forms.
|
||||
// PCRE and languages based on it (specifically, PHP and Ruby)
|
||||
// support all three as well. EcmaScript 4 uses only the Python form.
|
||||
// support all three as well. EcmaScript 4 uses only the Python form.
|
||||
//
|
||||
// In both the open source world (via Code Search) and the
|
||||
// Google source tree, (?P<expr>name) is the dominant form,
|
||||
// so that's the one we implement. One is enough.
|
||||
// so that's the one we implement. One is enough.
|
||||
if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
|
||||
// Pull out name.
|
||||
end := strings.IndexRune(t, '>')
|
||||
|
@ -989,7 +989,7 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) {
|
|||
return t[end+1:], nil
|
||||
}
|
||||
|
||||
// Non-capturing group. Might also twiddle Perl flags.
|
||||
// Non-capturing group. Might also twiddle Perl flags.
|
||||
var c rune
|
||||
t = t[2:] // skip (?
|
||||
flags := p.flags
|
||||
|
@ -1257,7 +1257,7 @@ Switch:
|
|||
if c < utf8.RuneSelf && !isalnum(c) {
|
||||
// Escaped non-word characters are always themselves.
|
||||
// PCRE is not quite so rigorous: it accepts things like
|
||||
// \q, but we don't. We once rejected \_, but too many
|
||||
// \q, but we don't. We once rejected \_, but too many
|
||||
// programs and people insist on using it, so allow \_.
|
||||
return c, t, nil
|
||||
}
|
||||
|
@ -1292,7 +1292,7 @@ Switch:
|
|||
if c == '{' {
|
||||
// Any number of digits in braces.
|
||||
// Perl accepts any text at all; it ignores all text
|
||||
// after the first non-hex digit. We require only hex digits,
|
||||
// after the first non-hex digit. We require only hex digits,
|
||||
// and at least one.
|
||||
nhex := 0
|
||||
r = 0
|
||||
|
@ -1333,10 +1333,10 @@ Switch:
|
|||
}
|
||||
return x*16 + y, t, nil
|
||||
|
||||
// C escapes. There is no case 'b', to avoid misparsing
|
||||
// C escapes. There is no case 'b', to avoid misparsing
|
||||
// the Perl word-boundary \b as the C backspace \b
|
||||
// when in POSIX mode. In Perl, /\b/ means word-boundary
|
||||
// but /[\b]/ means backspace. We don't support that.
|
||||
// when in POSIX mode. In Perl, /\b/ means word-boundary
|
||||
// but /[\b]/ means backspace. We don't support that.
|
||||
// If you want a backspace, embed a literal backspace
|
||||
// character or use \x08.
|
||||
case 'a':
|
||||
|
@ -1377,7 +1377,7 @@ type charGroup struct {
|
|||
}
|
||||
|
||||
// parsePerlClassEscape parses a leading Perl character class escape like \d
|
||||
// from the beginning of s. If one is present, it appends the characters to r
|
||||
// from the beginning of s. If one is present, it appends the characters to r
|
||||
// and returns the new slice r and the remainder of the string.
|
||||
func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
|
||||
if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
|
||||
|
@ -1391,7 +1391,7 @@ func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest stri
|
|||
}
|
||||
|
||||
// parseNamedClass parses a leading POSIX named character class like [:alnum:]
|
||||
// from the beginning of s. If one is present, it appends the characters to r
|
||||
// from the beginning of s. If one is present, it appends the characters to r
|
||||
// and returns the new slice r and the remainder of the string.
|
||||
func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
|
||||
if len(s) < 2 || s[0] != '[' || s[1] != ':' {
|
||||
|
@ -1454,7 +1454,7 @@ func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
|
|||
}
|
||||
|
||||
// parseUnicodeClass parses a leading Unicode character class like \p{Han}
|
||||
// from the beginning of s. If one is present, it appends the characters to r
|
||||
// from the beginning of s. If one is present, it appends the characters to r
|
||||
// and returns the new slice r and the remainder of the string.
|
||||
func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
|
||||
if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
|
||||
|
@ -1692,7 +1692,7 @@ const (
|
|||
// minimum and maximum runes involved in folding.
|
||||
// checked during test.
|
||||
minFold = 0x0041
|
||||
maxFold = 0x118df
|
||||
maxFold = 0x1e943
|
||||
)
|
||||
|
||||
// appendFoldedRange returns the result of appending the range lo-hi
|
||||
|
@ -1718,7 +1718,7 @@ func appendFoldedRange(r []rune, lo, hi rune) []rune {
|
|||
hi = maxFold
|
||||
}
|
||||
|
||||
// Brute force. Depend on appendRange to coalesce ranges on the fly.
|
||||
// Brute force. Depend on appendRange to coalesce ranges on the fly.
|
||||
for c := lo; c <= hi; c++ {
|
||||
r = appendRange(r, c, c)
|
||||
f := unicode.SimpleFold(c)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
|
|
@ -144,7 +144,7 @@ func (i *Inst) op() InstOp {
|
|||
}
|
||||
|
||||
// Prefix returns a literal string that all matches for the
|
||||
// regexp must start with. Complete is true if the prefix
|
||||
// regexp must start with. Complete is true if the prefix
|
||||
// is the entire match.
|
||||
func (p *Prog) Prefix() (prefix string, complete bool) {
|
||||
i, _ := p.skipNop(uint32(p.Start))
|
||||
|
@ -164,7 +164,7 @@ func (p *Prog) Prefix() (prefix string, complete bool) {
|
|||
}
|
||||
|
||||
// StartCond returns the leading empty-width conditions that must
|
||||
// be true in any match. It returns ^EmptyOp(0) if no matches are possible.
|
||||
// be true in any match. It returns ^EmptyOp(0) if no matches are possible.
|
||||
func (p *Prog) StartCond() EmptyOp {
|
||||
var flag EmptyOp
|
||||
pc := uint32(p.Start)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
@ -139,7 +139,7 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {
|
|||
if len(re.Rune) == 0 {
|
||||
b.WriteString(`^\x00-\x{10FFFF}`)
|
||||
} else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
|
||||
// Contains 0 and MaxRune. Probably a negated class.
|
||||
// Contains 0 and MaxRune. Probably a negated class.
|
||||
// Print the gaps.
|
||||
b.WriteRune('^')
|
||||
for i := 1; i < len(re.Rune)-1; i += 2 {
|
||||
|
@ -252,7 +252,7 @@ const meta = `\.+*?()|[]{}^$`
|
|||
|
||||
func escape(b *bytes.Buffer, r rune, force bool) {
|
||||
if unicode.IsPrint(r) {
|
||||
if strings.IndexRune(meta, r) >= 0 || force {
|
||||
if strings.ContainsRune(meta, r) || force {
|
||||
b.WriteRune('\\')
|
||||
}
|
||||
b.WriteRune(r)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
@ -8,7 +8,7 @@ package syntax
|
|||
// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/.
|
||||
// The resulting regexp will execute correctly but its string representation
|
||||
// will not produce the same parse tree, because capturing parentheses
|
||||
// may have been duplicated or removed. For example, the simplified form
|
||||
// may have been duplicated or removed. For example, the simplified form
|
||||
// for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1.
|
||||
// The returned regexp may share structure with or be the original.
|
||||
func (re *Regexp) Simplify() *Regexp {
|
||||
|
@ -117,13 +117,13 @@ func (re *Regexp) Simplify() *Regexp {
|
|||
}
|
||||
|
||||
// simplify1 implements Simplify for the unary OpStar,
|
||||
// OpPlus, and OpQuest operators. It returns the simple regexp
|
||||
// OpPlus, and OpQuest operators. It returns the simple regexp
|
||||
// equivalent to
|
||||
//
|
||||
// Regexp{Op: op, Flags: flags, Sub: {sub}}
|
||||
//
|
||||
// under the assumption that sub is already simple, and
|
||||
// without first allocating that structure. If the regexp
|
||||
// without first allocating that structure. If the regexp
|
||||
// to be returned turns out to be equivalent to re, simplify1
|
||||
// returns re instead.
|
||||
//
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
|
@ -59,7 +59,7 @@ var simplifyTests = []struct {
|
|||
{`a{0,1}`, `a?`},
|
||||
// The next three are illegible because Simplify inserts (?:)
|
||||
// parens instead of () parens to avoid creating extra
|
||||
// captured subexpressions. The comments show a version with fewer parens.
|
||||
// captured subexpressions. The comments show a version with fewer parens.
|
||||
{`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)?
|
||||
{`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)?
|
||||
{`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)?
|
||||
|
@ -117,7 +117,7 @@ var simplifyTests = []struct {
|
|||
// Empty string as a regular expression.
|
||||
// The empty string must be preserved inside parens in order
|
||||
// to make submatches work right, so these tests are less
|
||||
// interesting than they might otherwise be. String inserts
|
||||
// interesting than they might otherwise be. String inserts
|
||||
// explicit (?:) in place of non-parenthesized empty strings,
|
||||
// to make them easier to spot for other parsers.
|
||||
{`(a|b|)`, `([a-b]|(?:))`},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue