diff options
author | clsr <clsr@clsr.net> | 2017-08-18 13:45:49 +0200 |
---|---|---|
committer | clsr <clsr@clsr.net> | 2017-08-18 13:45:49 +0200 |
commit | 26248678aafc2f8e277d4bdafc116f2b349b02c5 (patch) | |
tree | 15f82488edb8c05aae756443284731875f36737c /simpletext.go | |
download | cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.tar.gz cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.zip |
Initial commitv0.1.0
Diffstat (limited to 'simpletext.go')
-rw-r--r-- | simpletext.go | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/simpletext.go b/simpletext.go new file mode 100644 index 0000000..78e089e --- /dev/null +++ b/simpletext.go @@ -0,0 +1,196 @@ +package cnm + +import ( + "bytes" + "regexp" + "strconv" + "strings" +) + +// ParseSimpleText parses raw as simple text (collapses whitespace and resolves +// escape sequences). +func ParseSimpleText(raw string) string { + return Unescape(CollapseWhitespace(raw)) +} + +// CollapseWhitespace collapses sequences of non-escaped whitespace in raw CNM +// simple text into single spaces. +func CollapseWhitespace(raw string) string { + s := strings.Join(strings.FieldsFunc(raw, func(r rune) bool { + switch r { + case '\t', '\n', '\f', '\r': + return true + } + return false + }), " ") + + var buf bytes.Buffer + first := true + escape := false + space := false + for _, r := range s { + switch r { + case '\\': + if escape { + buf.WriteString("\\\\") + escape = false + } else { + escape = true + } + if space && !first { + buf.WriteByte(' ') + } + space = false + first = false + case ' ': + if escape { + buf.WriteString("\\ ") + escape = false + } else { + space = true + } + default: + if escape { + buf.WriteByte('\\') + } + if space && !first { + buf.WriteByte(' ') + } + buf.WriteRune(r) + escape = false + space = false + first = false + } + } + if escape { + buf.WriteByte('\\') + } + return buf.String() +} + +var escapeRe = regexp.MustCompile(`[\t\n\f\r\\\x00]|^ | $| `) + +// Escape escapes whitespace, backslash and and U+0000 within s. +func Escape(s string) string { + return escapeRe.ReplaceAllStringFunc(s, func(match string) string { + switch match { + case "\t": + return `\t` + case "\n": + return `\n` + case "\f": + return `\f` + case "\r": + return `\r` + case "\\": + return `\\` + case "\x00": + return `\x00` + case " ": + return `\ ` + case " ": + return ` \ ` + } + return match // this shouldn't happen + }) +} + +var escapeSpaceRe = regexp.MustCompile(`[\t\n]|^ | $| `) + +// EscapeSpace works like Escape, except it only escapes spaces, tabs and line +// feeds. +func EscapeSpace(s string) string { + return escapeSpaceRe.ReplaceAllStringFunc(s, func(match string) string { + switch match { + case "\t": + return `\t` + case "\n": + return `\n` + case " ": + return `\ ` + case " ": + return ` \ ` + } + return match // this shouldn't happen + }) +} + +var escapeNonspaceRe = regexp.MustCompile(`[\f\r\\\x00]`) + +// EscapeNonspace works like Escape, except it does not escape spaces, tabs and +// line feeds. +func EscapeNonspace(s string) string { + return escapeNonspaceRe.ReplaceAllStringFunc(s, func(match string) string { + switch match { + case "\f": + return `\f` + case "\r": + return `\r` + case "\\": + return `\\` + case "\x00": + return `\x00` + } + return match // this shouldn't happen + }) +} + +var unescapeRe = regexp.MustCompile(`\\(?:[btnvfr \\]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})`) + +// Unescape resolves escape sequences in simple text. +func Unescape(s string) string { + return unescapeRe.ReplaceAllStringFunc(s, func(match string) string { + switch match[1] { + case 'b': + return "\b" + case 't': + return "\t" + case 'n': + return "\n" + case 'v': + return "\v" + case 'f': + return "\f" + case 'r': + return "\r" + case ' ': + return " " + case '\\': + return "\\" + case 'x': + n, _ := strconv.ParseUint(match[2:], 16, 8) + return string(n) + case 'u': + n, _ := strconv.ParseUint(match[2:], 16, 16) + return string(n) + case 'U': + n, _ := strconv.ParseUint(match[2:], 16, 32) + return string(n) + } + return match // this shouldn't happen + }) +} + +var splitRe = regexp.MustCompile(`((?:[^\t\n\f\r\\ ]|\\.?)+|^)`) + +// SplitUnescape splits the string s by whitespace, then unescapes simple text +// escape sequences. +func SplitUnescape(s string) []string { + ss := splitRe.FindAllString(s, -1) + for i := range ss { + ss[i] = Unescape(ss[i]) + } + return ss +} + +// JoinEscape escapes each argument using simple text escape sequences and then +// joins them with spaces. +func JoinEscape(ss []string) string { + var l []string + for _, s := range ss { + if s != "" { + l = append(l, Escape(s)) + } + } + return strings.Join(l, " ") +} |