package cnm import ( "bytes" "regexp" "strconv" "strings" ) // ParseSimpleText parses raw as simple text (collapses whitespace and resolves // escape sequences). func ParseSimpleText(raw string) string { return Unescape(CollapseWhitespace(raw)) } // CollapseWhitespace collapses sequences of non-escaped whitespace in raw CNM // simple text into single spaces. func CollapseWhitespace(raw string) string { s := strings.Join(strings.FieldsFunc(raw, func(r rune) bool { switch r { case '\t', '\n', '\f', '\r': return true } return false }), " ") var buf bytes.Buffer first := true escape := false space := false for _, r := range s { switch r { case '\\': if escape { buf.WriteString("\\\\") escape = false } else { escape = true } if space && !first { buf.WriteByte(' ') } space = false first = false case ' ': if escape { buf.WriteString("\\ ") escape = false } else { space = true } default: if escape { buf.WriteByte('\\') } if space && !first { buf.WriteByte(' ') } buf.WriteRune(r) escape = false space = false first = false } } if escape { buf.WriteByte('\\') } return buf.String() } var escapeRe = regexp.MustCompile(`[\t\n\f\r\\\x00]|^ | $| `) // Escape escapes whitespace, backslash and U+0000 within s. // // Only leading, trailing or multiple consecutive spaces are escaped. func Escape(s string) string { return escapeRe.ReplaceAllStringFunc(s, func(match string) string { switch match { case "\t": return `\t` case "\n": return `\n` case "\f": return `\f` case "\r": return `\r` case "\\": return `\\` case "\x00": return `\x00` case " ": return `\ ` case " ": return ` \ ` } return match // this shouldn't happen }) } var escapeSpaceRe = regexp.MustCompile(`[\t\n]|^ | $| `) // EscapeSpace works like Escape, except it only escapes spaces, tabs and line // feeds. func EscapeSpace(s string) string { return escapeSpaceRe.ReplaceAllStringFunc(s, func(match string) string { switch match { case "\t": return `\t` case "\n": return `\n` case " ": return `\ ` case " ": return ` \ ` } return match // this shouldn't happen }) } var escapeAllRe = regexp.MustCompile(`[\t\n\f\r\\\x00 ]`) // EscapeAll escapes all whitespace, backslash and and U+0000 within s. // // Unlike Escape, all spaces are escaped, not just ones that would be collapsed // into one. func EscapeAll(s string) string { return escapeAllRe.ReplaceAllStringFunc(s, func(match string) string { switch match { case "\t": return `\t` case "\n": return `\n` case "\f": return `\f` case "\r": return `\r` case "\\": return `\\` case "\x00": return `\x00` case " ": return `\ ` } return match // this shouldn't happen }) } var escapeNonspaceRe = regexp.MustCompile(`[\f\r\\\x00]`) // EscapeNonspace works like Escape, except it does not escape spaces, tabs and // line feeds. func EscapeNonspace(s string) string { return escapeNonspaceRe.ReplaceAllStringFunc(s, func(match string) string { switch match { case "\f": return `\f` case "\r": return `\r` case "\\": return `\\` case "\x00": return `\x00` } return match // this shouldn't happen }) } var unescapeRe = regexp.MustCompile(`\\(?:[btnvfr \\]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})`) // Unescape resolves escape sequences in simple text. func Unescape(s string) string { return unescapeRe.ReplaceAllStringFunc(s, func(match string) string { switch match[1] { case 'b': return "\b" case 't': return "\t" case 'n': return "\n" case 'v': return "\v" case 'f': return "\f" case 'r': return "\r" case ' ': return " " case '\\': return "\\" case 'x': n, _ := strconv.ParseUint(match[2:], 16, 8) return string(rune(n)) case 'u': n, _ := strconv.ParseUint(match[2:], 16, 16) return string(rune(n)) case 'U': n, _ := strconv.ParseUint(match[2:], 16, 32) return string(rune(n)) } return match // this shouldn't happen }) } var splitRe = regexp.MustCompile(`((?:[^\t\n\f\r\\ ]|\\.?)+|^)`) // SplitUnescape splits the string s by whitespace, then unescapes simple text // escape sequences. func SplitUnescape(s string) []string { ss := splitRe.FindAllString(s, -1) for i := range ss { ss[i] = Unescape(ss[i]) } return ss } // JoinEscape escapes each argument using simple text escape sequences and then // joins them with spaces. func JoinEscape(ss []string) string { var l []string for _, s := range ss { if s != "" { l = append(l, Escape(s)) } } return strings.Join(l, " ") }