Update Go library to r60.
From-SVN: r178910
This commit is contained in:
parent
5548ca3540
commit
adb0401dac
718 changed files with 58911 additions and 30469 deletions
284
libgo/go/exp/regexp/syntax/regexp.go
Normal file
284
libgo/go/exp/regexp/syntax/regexp.go
Normal file
|
@ -0,0 +1,284 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package syntax parses regular expressions into syntax trees.
|
||||
// WORK IN PROGRESS.
|
||||
package syntax
|
||||
|
||||
// Note to implementers:
|
||||
// In this package, re is always a *Regexp and r is always a rune.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// A Regexp is a node in a regular expression syntax tree.
|
||||
type Regexp struct {
|
||||
Op Op // operator
|
||||
Flags Flags
|
||||
Sub []*Regexp // subexpressions, if any
|
||||
Sub0 [1]*Regexp // storage for short Sub
|
||||
Rune []int // matched runes, for OpLiteral, OpCharClass
|
||||
Rune0 [2]int // storage for short Rune
|
||||
Min, Max int // min, max for OpRepeat
|
||||
Cap int // capturing index, for OpCapture
|
||||
Name string // capturing name, for OpCapture
|
||||
}
|
||||
|
||||
// An Op is a single regular expression operator.
|
||||
type Op uint8
|
||||
|
||||
// Operators are listed in precedence order, tightest binding to weakest.
|
||||
// Character class operators are listed simplest to most complex
|
||||
// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
|
||||
|
||||
const (
|
||||
OpNoMatch Op = 1 + iota // matches no strings
|
||||
OpEmptyMatch // matches empty string
|
||||
OpLiteral // matches Runes sequence
|
||||
OpCharClass // matches Runes interpreted as range pair list
|
||||
OpAnyCharNotNL // matches any character
|
||||
OpAnyChar // matches any character
|
||||
OpBeginLine // matches empty string at beginning of line
|
||||
OpEndLine // matches empty string at end of line
|
||||
OpBeginText // matches empty string at beginning of text
|
||||
OpEndText // matches empty string at end of text
|
||||
OpWordBoundary // matches word boundary `\b`
|
||||
OpNoWordBoundary // matches word non-boundary `\B`
|
||||
OpCapture // capturing subexpression with index Cap, optional name Name
|
||||
OpStar // matches Sub[0] zero or more times
|
||||
OpPlus // matches Sub[0] one or more times
|
||||
OpQuest // matches Sub[0] zero or one times
|
||||
OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
|
||||
OpConcat // matches concatenation of Subs
|
||||
OpAlternate // matches alternation of Subs
|
||||
)
|
||||
|
||||
const opPseudo Op = 128 // where pseudo-ops start
|
||||
|
||||
// Equal returns true if x and y have identical structure.
|
||||
func (x *Regexp) Equal(y *Regexp) bool {
|
||||
if x == nil || y == nil {
|
||||
return x == y
|
||||
}
|
||||
if x.Op != y.Op {
|
||||
return false
|
||||
}
|
||||
switch x.Op {
|
||||
case OpEndText:
|
||||
// The parse flags remember whether this is \z or \Z.
|
||||
if x.Flags&WasDollar != y.Flags&WasDollar {
|
||||
return false
|
||||
}
|
||||
|
||||
case OpLiteral, OpCharClass:
|
||||
if len(x.Rune) != len(y.Rune) {
|
||||
return false
|
||||
}
|
||||
for i, r := range x.Rune {
|
||||
if r != y.Rune[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
case OpAlternate, OpConcat:
|
||||
if len(x.Sub) != len(y.Sub) {
|
||||
return false
|
||||
}
|
||||
for i, sub := range x.Sub {
|
||||
if !sub.Equal(y.Sub[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
case OpStar, OpPlus, OpQuest:
|
||||
if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
|
||||
return false
|
||||
}
|
||||
|
||||
case OpRepeat:
|
||||
if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
|
||||
return false
|
||||
}
|
||||
|
||||
case OpCapture:
|
||||
if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// writeRegexp writes the Perl syntax for the regular expression re to b.
|
||||
func writeRegexp(b *bytes.Buffer, re *Regexp) {
|
||||
switch re.Op {
|
||||
default:
|
||||
b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
|
||||
case OpNoMatch:
|
||||
b.WriteString(`[^\x00-\x{10FFFF}]`)
|
||||
case OpEmptyMatch:
|
||||
b.WriteString(`(?:)`)
|
||||
case OpLiteral:
|
||||
if re.Flags&FoldCase != 0 {
|
||||
b.WriteString(`(?i:`)
|
||||
}
|
||||
for _, r := range re.Rune {
|
||||
escape(b, r, false)
|
||||
}
|
||||
if re.Flags&FoldCase != 0 {
|
||||
b.WriteString(`)`)
|
||||
}
|
||||
case OpCharClass:
|
||||
if len(re.Rune)%2 != 0 {
|
||||
b.WriteString(`[invalid char class]`)
|
||||
break
|
||||
}
|
||||
b.WriteRune('[')
|
||||
if len(re.Rune) == 0 {
|
||||
b.WriteString(`^\x00-\x{10FFFF}`)
|
||||
} else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
|
||||
// Contains 0 and MaxRune. Probably a negated class.
|
||||
// Print the gaps.
|
||||
b.WriteRune('^')
|
||||
for i := 1; i < len(re.Rune)-1; i += 2 {
|
||||
lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
|
||||
escape(b, lo, lo == '-')
|
||||
if lo != hi {
|
||||
b.WriteRune('-')
|
||||
escape(b, hi, hi == '-')
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < len(re.Rune); i += 2 {
|
||||
lo, hi := re.Rune[i], re.Rune[i+1]
|
||||
escape(b, lo, lo == '-')
|
||||
if lo != hi {
|
||||
b.WriteRune('-')
|
||||
escape(b, hi, hi == '-')
|
||||
}
|
||||
}
|
||||
}
|
||||
b.WriteRune(']')
|
||||
case OpAnyCharNotNL:
|
||||
b.WriteString(`[^\n]`)
|
||||
case OpAnyChar:
|
||||
b.WriteRune('.')
|
||||
case OpBeginLine:
|
||||
b.WriteRune('^')
|
||||
case OpEndLine:
|
||||
b.WriteRune('$')
|
||||
case OpBeginText:
|
||||
b.WriteString(`\A`)
|
||||
case OpEndText:
|
||||
b.WriteString(`\z`)
|
||||
case OpWordBoundary:
|
||||
b.WriteString(`\b`)
|
||||
case OpNoWordBoundary:
|
||||
b.WriteString(`\B`)
|
||||
case OpCapture:
|
||||
if re.Name != "" {
|
||||
b.WriteString(`(?P<`)
|
||||
b.WriteString(re.Name)
|
||||
b.WriteRune('>')
|
||||
} else {
|
||||
b.WriteRune('(')
|
||||
}
|
||||
if re.Sub[0].Op != OpEmptyMatch {
|
||||
writeRegexp(b, re.Sub[0])
|
||||
}
|
||||
b.WriteRune(')')
|
||||
case OpStar, OpPlus, OpQuest, OpRepeat:
|
||||
if sub := re.Sub[0]; sub.Op > OpCapture {
|
||||
b.WriteString(`(?:`)
|
||||
writeRegexp(b, sub)
|
||||
b.WriteString(`)`)
|
||||
} else {
|
||||
writeRegexp(b, sub)
|
||||
}
|
||||
switch re.Op {
|
||||
case OpStar:
|
||||
b.WriteRune('*')
|
||||
case OpPlus:
|
||||
b.WriteRune('+')
|
||||
case OpQuest:
|
||||
b.WriteRune('?')
|
||||
case OpRepeat:
|
||||
b.WriteRune('{')
|
||||
b.WriteString(strconv.Itoa(re.Min))
|
||||
if re.Max != re.Min {
|
||||
b.WriteRune(',')
|
||||
if re.Max >= 0 {
|
||||
b.WriteString(strconv.Itoa(re.Max))
|
||||
}
|
||||
}
|
||||
b.WriteRune('}')
|
||||
}
|
||||
case OpConcat:
|
||||
for _, sub := range re.Sub {
|
||||
if sub.Op == OpAlternate {
|
||||
b.WriteString(`(?:`)
|
||||
writeRegexp(b, sub)
|
||||
b.WriteString(`)`)
|
||||
} else {
|
||||
writeRegexp(b, sub)
|
||||
}
|
||||
}
|
||||
case OpAlternate:
|
||||
for i, sub := range re.Sub {
|
||||
if i > 0 {
|
||||
b.WriteRune('|')
|
||||
}
|
||||
writeRegexp(b, sub)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (re *Regexp) String() string {
|
||||
var b bytes.Buffer
|
||||
writeRegexp(&b, re)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
const meta = `\.+*?()|[]{}^$`
|
||||
|
||||
func escape(b *bytes.Buffer, r int, force bool) {
|
||||
if unicode.IsPrint(r) {
|
||||
if strings.IndexRune(meta, r) >= 0 || force {
|
||||
b.WriteRune('\\')
|
||||
}
|
||||
b.WriteRune(r)
|
||||
return
|
||||
}
|
||||
|
||||
switch r {
|
||||
case '\a':
|
||||
b.WriteString(`\a`)
|
||||
case '\f':
|
||||
b.WriteString(`\f`)
|
||||
case '\n':
|
||||
b.WriteString(`\n`)
|
||||
case '\r':
|
||||
b.WriteString(`\r`)
|
||||
case '\t':
|
||||
b.WriteString(`\t`)
|
||||
case '\v':
|
||||
b.WriteString(`\v`)
|
||||
default:
|
||||
if r < 0x100 {
|
||||
b.WriteString(`\x`)
|
||||
s := strconv.Itob(r, 16)
|
||||
if len(s) == 1 {
|
||||
b.WriteRune('0')
|
||||
}
|
||||
b.WriteString(s)
|
||||
break
|
||||
}
|
||||
b.WriteString(`\x{`)
|
||||
b.WriteString(strconv.Itob(r, 16))
|
||||
b.WriteString(`}`)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue