summaryrefslogtreecommitdiffstats
path: root/simpletext.go
diff options
context:
space:
mode:
authorclsr <clsr@clsr.net>2017-08-18 13:45:49 +0200
committerclsr <clsr@clsr.net>2017-08-18 13:45:49 +0200
commit26248678aafc2f8e277d4bdafc116f2b349b02c5 (patch)
tree15f82488edb8c05aae756443284731875f36737c /simpletext.go
downloadcnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.tar.gz
cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.zip
Initial commitv0.1.0
Diffstat (limited to 'simpletext.go')
-rw-r--r--simpletext.go196
1 files changed, 196 insertions, 0 deletions
diff --git a/simpletext.go b/simpletext.go
new file mode 100644
index 0000000..78e089e
--- /dev/null
+++ b/simpletext.go
@@ -0,0 +1,196 @@
+package cnm
+
+import (
+ "bytes"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+// ParseSimpleText parses raw as simple text (collapses whitespace and resolves
+// escape sequences).
+func ParseSimpleText(raw string) string {
+ return Unescape(CollapseWhitespace(raw))
+}
+
+// CollapseWhitespace collapses sequences of non-escaped whitespace in raw CNM
+// simple text into single spaces.
+func CollapseWhitespace(raw string) string {
+ s := strings.Join(strings.FieldsFunc(raw, func(r rune) bool {
+ switch r {
+ case '\t', '\n', '\f', '\r':
+ return true
+ }
+ return false
+ }), " ")
+
+ var buf bytes.Buffer
+ first := true
+ escape := false
+ space := false
+ for _, r := range s {
+ switch r {
+ case '\\':
+ if escape {
+ buf.WriteString("\\\\")
+ escape = false
+ } else {
+ escape = true
+ }
+ if space && !first {
+ buf.WriteByte(' ')
+ }
+ space = false
+ first = false
+ case ' ':
+ if escape {
+ buf.WriteString("\\ ")
+ escape = false
+ } else {
+ space = true
+ }
+ default:
+ if escape {
+ buf.WriteByte('\\')
+ }
+ if space && !first {
+ buf.WriteByte(' ')
+ }
+ buf.WriteRune(r)
+ escape = false
+ space = false
+ first = false
+ }
+ }
+ if escape {
+ buf.WriteByte('\\')
+ }
+ return buf.String()
+}
+
+var escapeRe = regexp.MustCompile(`[\t\n\f\r\\\x00]|^ | $| `)
+
+// Escape escapes whitespace, backslash and and U+0000 within s.
+func Escape(s string) string {
+ return escapeRe.ReplaceAllStringFunc(s, func(match string) string {
+ switch match {
+ case "\t":
+ return `\t`
+ case "\n":
+ return `\n`
+ case "\f":
+ return `\f`
+ case "\r":
+ return `\r`
+ case "\\":
+ return `\\`
+ case "\x00":
+ return `\x00`
+ case " ":
+ return `\ `
+ case " ":
+ return ` \ `
+ }
+ return match // this shouldn't happen
+ })
+}
+
+var escapeSpaceRe = regexp.MustCompile(`[\t\n]|^ | $| `)
+
+// EscapeSpace works like Escape, except it only escapes spaces, tabs and line
+// feeds.
+func EscapeSpace(s string) string {
+ return escapeSpaceRe.ReplaceAllStringFunc(s, func(match string) string {
+ switch match {
+ case "\t":
+ return `\t`
+ case "\n":
+ return `\n`
+ case " ":
+ return `\ `
+ case " ":
+ return ` \ `
+ }
+ return match // this shouldn't happen
+ })
+}
+
+var escapeNonspaceRe = regexp.MustCompile(`[\f\r\\\x00]`)
+
+// EscapeNonspace works like Escape, except it does not escape spaces, tabs and
+// line feeds.
+func EscapeNonspace(s string) string {
+ return escapeNonspaceRe.ReplaceAllStringFunc(s, func(match string) string {
+ switch match {
+ case "\f":
+ return `\f`
+ case "\r":
+ return `\r`
+ case "\\":
+ return `\\`
+ case "\x00":
+ return `\x00`
+ }
+ return match // this shouldn't happen
+ })
+}
+
+var unescapeRe = regexp.MustCompile(`\\(?:[btnvfr \\]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})`)
+
+// Unescape resolves escape sequences in simple text.
+func Unescape(s string) string {
+ return unescapeRe.ReplaceAllStringFunc(s, func(match string) string {
+ switch match[1] {
+ case 'b':
+ return "\b"
+ case 't':
+ return "\t"
+ case 'n':
+ return "\n"
+ case 'v':
+ return "\v"
+ case 'f':
+ return "\f"
+ case 'r':
+ return "\r"
+ case ' ':
+ return " "
+ case '\\':
+ return "\\"
+ case 'x':
+ n, _ := strconv.ParseUint(match[2:], 16, 8)
+ return string(n)
+ case 'u':
+ n, _ := strconv.ParseUint(match[2:], 16, 16)
+ return string(n)
+ case 'U':
+ n, _ := strconv.ParseUint(match[2:], 16, 32)
+ return string(n)
+ }
+ return match // this shouldn't happen
+ })
+}
+
+var splitRe = regexp.MustCompile(`((?:[^\t\n\f\r\\ ]|\\.?)+|^)`)
+
+// SplitUnescape splits the string s by whitespace, then unescapes simple text
+// escape sequences.
+func SplitUnescape(s string) []string {
+ ss := splitRe.FindAllString(s, -1)
+ for i := range ss {
+ ss[i] = Unescape(ss[i])
+ }
+ return ss
+}
+
+// JoinEscape escapes each argument using simple text escape sequences and then
+// joins them with spaces.
+func JoinEscape(ss []string) string {
+ var l []string
+ for _, s := range ss {
+ if s != "" {
+ l = append(l, Escape(s))
+ }
+ }
+ return strings.Join(l, " ")
+}