diff options
-rw-r--r-- | COPYING | 3 | ||||
-rw-r--r-- | cnmfmt/cnmfmt.go | 2 | ||||
-rw-r--r-- | cnmfmt/cnmfmt_test.go | 15 | ||||
-rw-r--r-- | content.go | 2 | ||||
-rw-r--r-- | document.go | 4 | ||||
-rw-r--r-- | go.mod | 3 | ||||
-rw-r--r-- | parse_test.go | 6 | ||||
-rw-r--r-- | simpletext.go | 38 | ||||
-rw-r--r-- | simpletext_test.go | 179 | ||||
-rw-r--r-- | write_test.go | 11 |
10 files changed, 171 insertions, 92 deletions
@@ -0,0 +1,3 @@ +This software is released into the public domain. +It is provided "as is", without warranties or conditions of any kind. +Anyone is free to use, modify, redistribute and do anything with this software. diff --git a/cnmfmt/cnmfmt.go b/cnmfmt/cnmfmt.go index 1e5b6ee..3e38978 100644 --- a/cnmfmt/cnmfmt.go +++ b/cnmfmt/cnmfmt.go @@ -195,7 +195,7 @@ func (t Text) WriteIndent(w io.Writer, n int) error { if span.Text != "" { pad = " " } - line = append(line, "@@", cnm.Escape(span.Format.Link), pad) + line = append(line, "@@", cnm.EscapeAll(span.Format.Link), pad) } } } diff --git a/cnmfmt/cnmfmt_test.go b/cnmfmt/cnmfmt_test.go index 5d50b18..2ad92cb 100644 --- a/cnmfmt/cnmfmt_test.go +++ b/cnmfmt/cnmfmt_test.go @@ -127,6 +127,9 @@ var parseTests = map[string]Text{ "@@foo ": Text{[]Span{ Span{Format{Link: "foo"}, ""}, }}, + "@@foo\\ bar baz quux@@": Text{[]Span{ + Span{Format{Link: "foo bar"}, "baz quux"}, + }}, "@@foo\\": Text{[]Span{ Span{Format{Link: "foo\\"}, ""}, }}, @@ -213,9 +216,9 @@ func textEqual(a, b Text) bool { } var escapeTests = map[string]string{ - "\n\r\t\v\x00": "\\n\\r\\t\v\\x00", + "\n\r\t\v\x00": "\\n\\r\\t\v\\x00", "@@!!##\"\"//\"\"__``**%%^^&&++==\x01\x01\\": "\\@\\@!!##\\\"\\\"//\\\"\\\"\\_\\_\\`\\`\\*\\*%%^^&&++==\x01\x01\\\\", - `foo\@\@bar`: `foo\\\@\\\@bar`, + `foo\@\@bar`: `foo\\\@\\\@bar`, } func TestEscape(t *testing.T) { @@ -269,6 +272,10 @@ var parseTextTests = map[string]TextFmtContents{ Text{[]Span{Span{Format{Emphasized: true}, "foo"}}}, Text{[]Span{Span{Format{}, "bar"}}}, }}, + + "@@foo\\ bar baz quux@@": TextFmtContents{[]Text{ + Text{[]Span{Span{Format{Link: "foo bar"}, "baz quux"}}}, + }}, } func TestParseTextFmt(t *testing.T) { @@ -335,6 +342,10 @@ var writeTests = map[string]TextFmtContents{ }}, }}, + "@@foo\\ bar baz quux\n": TextFmtContents{[]Text{ + Text{[]Span{Span{Format{Link: "foo bar"}, "baz quux"}}}, + }}, + "foo**bar``baz**quux\n\n" + "\\ \"\"qwe\\ \"\"__\\ asd \\ __``zxc``**\\ \n\n" + "__@@http://example.com/__/ exa__mple@@ @@href text@@__ test\n": TextFmtContents{[]Text{ @@ -589,7 +589,7 @@ func (e *EmbedBlock) WriteIndent(w io.Writer, n int) error { } else { s += Escape(e.Type) } - s += " " + Escape(e.URL) + s += " " + EscapeAll(e.URL) if err := WriteIndent(w, s, n); err != nil { return err } diff --git a/document.go b/document.go index 65eac3e..236782b 100644 --- a/document.go +++ b/document.go @@ -197,7 +197,7 @@ type Link struct { // WriteIndent writes the link URL, name and description indented by n tabs. func (link Link) WriteIndent(w io.Writer, n int) error { - s := Escape(link.URL) + s := EscapeAll(link.URL) if link.Name != "" { s += " " + Escape(link.Name) } @@ -226,7 +226,7 @@ type Site struct { // WriteIndent writes the sitemap indented by n tabs. func (site Site) WriteIndent(w io.Writer, n int) error { - s := Escape(site.Path) + s := EscapeAll(site.Path) if site.Name != "" { s += " " + Escape(site.Name) } @@ -0,0 +1,3 @@ +module contnet.org/lib/cnm-go + +go 1.12 diff --git a/parse_test.go b/parse_test.go index e70b833..1c7dad9 100644 --- a/parse_test.go +++ b/parse_test.go @@ -269,7 +269,7 @@ var parseTests = map[string]*Document{ "title\n\tfoo bar\n" + "links\n\tqwe asd\n\tzxc 123\n" + - "site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n" + + "site\n\tfoo Foo\n\t\tbar\n\t\tbaz/qu\\ ux Test\n\t\t\t123\n" + "title\n\tbaz\n" + "links\n\tfoo\n" + "site\n\ttest": &Document{ @@ -283,10 +283,12 @@ var parseTests = map[string]*Document{ Children: []Site{ Site{ Path: "foo", + Name: "Foo", Children: []Site{ Site{Path: "bar"}, Site{ - Path: "baz/quux", + Path: "baz/qu ux", + Name: "Test", Children: []Site{ Site{Path: "123"}, }, diff --git a/simpletext.go b/simpletext.go index 78e089e..00d82ec 100644 --- a/simpletext.go +++ b/simpletext.go @@ -70,7 +70,9 @@ func CollapseWhitespace(raw string) string { var escapeRe = regexp.MustCompile(`[\t\n\f\r\\\x00]|^ | $| `) -// Escape escapes whitespace, backslash and and U+0000 within s. +// Escape escapes whitespace, backslash and U+0000 within s. +// +// Only leading, trailing or multiple consecutive spaces are escaped. func Escape(s string) string { return escapeRe.ReplaceAllStringFunc(s, func(match string) string { switch match { @@ -115,6 +117,34 @@ func EscapeSpace(s string) string { }) } +var escapeAllRe = regexp.MustCompile(`[\t\n\f\r\\\x00 ]`) + +// EscapeAll escapes all whitespace, backslash and and U+0000 within s. +// +// Unlike Escape, all spaces are escaped, not just ones that would be collapsed +// into one. +func EscapeAll(s string) string { + return escapeAllRe.ReplaceAllStringFunc(s, func(match string) string { + switch match { + case "\t": + return `\t` + case "\n": + return `\n` + case "\f": + return `\f` + case "\r": + return `\r` + case "\\": + return `\\` + case "\x00": + return `\x00` + case " ": + return `\ ` + } + return match // this shouldn't happen + }) +} + var escapeNonspaceRe = regexp.MustCompile(`[\f\r\\\x00]`) // EscapeNonspace works like Escape, except it does not escape spaces, tabs and @@ -159,13 +189,13 @@ func Unescape(s string) string { return "\\" case 'x': n, _ := strconv.ParseUint(match[2:], 16, 8) - return string(n) + return string(rune(n)) case 'u': n, _ := strconv.ParseUint(match[2:], 16, 16) - return string(n) + return string(rune(n)) case 'U': n, _ := strconv.ParseUint(match[2:], 16, 32) - return string(n) + return string(rune(n)) } return match // this shouldn't happen }) diff --git a/simpletext_test.go b/simpletext_test.go index 8fdf754..d75a741 100644 --- a/simpletext_test.go +++ b/simpletext_test.go @@ -3,21 +3,21 @@ package cnm import "testing" var simpleEscapes = map[string]string{ - "": ``, - "ContNet": `ContNet`, - "\t": `\t`, - "\n": `\n`, - "\f": `\f`, - "\r": `\r`, - " ": `\ `, - "\\": `\\`, - "\x00": `\x00`, - " ": `\ \ \ \ `, - " ": `\ \ \ \ `, - " ": `\ \ \ `, - " ": `\ \ \ `, - " ": `\ \ `, - " ": `\ \ `, + "": ``, + "ContNet": `ContNet`, + "\t": `\t`, + "\n": `\n`, + "\f": `\f`, + "\r": `\r`, + " ": `\ `, + "\\": `\\`, + "\x00": `\x00`, + " ": `\ \ \ \ `, + " ": `\ \ \ \ `, + " ": `\ \ \ `, + " ": `\ \ \ `, + " ": `\ \ `, + " ": `\ \ `, "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff", } @@ -33,21 +33,21 @@ func TestEscape(t *testing.T) { } var nonspaceEscapes = map[string]string{ - "": ``, - "ContNet": `ContNet`, - "\t": "\t", - "\n": "\n", - "\f": `\f`, - "\r": `\r`, - " ": ` `, - "\\": `\\`, - "\x00": `\x00`, - " ": ` `, - " ": ` `, - " ": ` `, - " ": ` `, - " ": ` `, - " ": ` `, + "": ``, + "ContNet": `ContNet`, + "\t": "\t", + "\n": "\n", + "\f": `\f`, + "\r": `\r`, + " ": ` `, + "\\": `\\`, + "\x00": `\x00`, + " ": ` `, + " ": ` `, + " ": ` `, + " ": ` `, + " ": ` `, + " ": ` `, "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff", } @@ -63,21 +63,21 @@ func TestEscapeNonspace(t *testing.T) { } var spaceEscapes = map[string]string{ - "": ``, - "ContNet": `ContNet`, - "\t": `\t`, - "\n": `\n`, - "\f": "\f", - "\r": "\r", - " ": `\ `, - "\\": `\`, - "\x00": "\x00", - " ": `\ \ \ \ `, - " ": `\ \ \ \ `, - " ": `\ \ \ `, - " ": `\ \ \ `, - " ": `\ \ `, - " ": `\ \ `, + "": ``, + "ContNet": `ContNet`, + "\t": `\t`, + "\n": `\n`, + "\f": "\f", + "\r": "\r", + " ": `\ `, + "\\": `\`, + "\x00": "\x00", + " ": `\ \ \ \ `, + " ": `\ \ \ \ `, + " ": `\ \ \ `, + " ": `\ \ \ `, + " ": `\ \ `, + " ": `\ \ `, "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\x00\xff\u00ff\\xff", } @@ -92,37 +92,66 @@ func TestEscapeSpace(t *testing.T) { } } +var allEscapes = map[string]string{ + "": ``, + "ContNet": `ContNet`, + "\t": `\t`, + "\n": `\n`, + "\f": `\f`, + "\r": `\r`, + " ": `\ `, + "\\": `\\`, + "\x00": `\x00`, + " ": `\ \ \ \ \ \ \ `, + " ": `\ \ \ \ \ \ `, + " ": `\ \ \ \ `, + " ": `\ \ \ `, + " ": `\ \ `, + "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff", +} + +func TestEscapeAll(t *testing.T) { + for k, v := range allEscapes { + t.Run(k, func(t *testing.T) { + e := EscapeAll(k) + if e != v { + t.Errorf("EscapeAll(%q) -> %q, expected %q", k, e, v) + } + }) + } +} + var simpleUnescapes = map[string]string{ - ``: "", - `ContNet`: "ContNet", - `\b`: "\b", - `\t`: "\t", - `\n`: "\n", - `\v`: "\v", - `\f`: "\f", - `\r`: "\r", - `\ `: " ", - `\\`: "\\", - `\`: "\\", - `\x00`: "\x00", - `a\nb\ c\rd\be\\f`: "a\nb c\rd\be\\f", - `\n\n\n`: "\n\n\n", - `\x00\xff\n\x123`: "\x00\u00ff\n\x123", - " \b\\b\t\n\v\f\r\\x00\x00\\\\xff": " \b\b\t\n\v\f\r\x00\x00\\xff", - `\xAA\xAa\xaA\xaa`: "\u00aa\u00aa\u00aa\u00aa", - `\x00\xfg`: "\x00\\xfg", - `\\\\\\`: "\\\\\\", - "\b5Ὂg̀9!\\n℃ᾭG": "\b5Ὂg̀9!\n℃ᾭG", - "\xff\\x00\xee\xaa\xee": "\xff\x00\xee\xaa\xee", - "\\x00\x10\\ \x30\x40": "\x00\x10\x20\x30\x40", - "\x10\x50\x90\xe0": "\x10\x50\x90\xe0", - `Hello,\ 世界`: "Hello, 世界", - "\xed\x9f\xbf": "\xed\x9f\xbf", - "\xee\x80\x80": "\xee\x80\x80", - "\xef\xbf\xbd": "\xef\xbf\xbd", - "\x80\x80\x80\x80": "\x80\x80\x80\x80", - `\ \ \ `: " ", - `\uffff\u0000\u0123\ufedc\ufffe`: "\uffff\u0000\u0123\ufedc\ufffe", + ``: "", + `ContNet`: "ContNet", + `\b`: "\b", + `\t`: "\t", + `\n`: "\n", + `\v`: "\v", + `\f`: "\f", + `\r`: "\r", + `\ `: " ", + `\\`: "\\", + `\`: "\\", + `\x00`: "\x00", + `a\nb\ c\rd\be\\f`: "a\nb c\rd\be\\f", + `\n\n\n`: "\n\n\n", + `\x00\xff\n\x123`: "\x00\u00ff\n\x123", + " \b\\b\t\n\v\f\r\\x00\x00\\\\xff": " \b\b\t\n\v\f\r\x00\x00\\xff", + `\xAA\xAa\xaA\xaa`: "\u00aa\u00aa\u00aa\u00aa", + `\x00\xfg`: "\x00\\xfg", + `\\\\\\`: "\\\\\\", + "\b5Ὂg̀9!\\n℃ᾭG": "\b5Ὂg̀9!\n℃ᾭG", + "\xff\\x00\xee\xaa\xee": "\xff\x00\xee\xaa\xee", + "\\x00\x10\\ \x30\x40": "\x00\x10\x20\x30\x40", + "\x10\x50\x90\xe0": "\x10\x50\x90\xe0", + `Hello,\ 世界`: "Hello, 世界", + "\xed\x9f\xbf": "\xed\x9f\xbf", + "\xee\x80\x80": "\xee\x80\x80", + "\xef\xbf\xbd": "\xef\xbf\xbd", + "\x80\x80\x80\x80": "\x80\x80\x80\x80", + `\ \ \ `: " ", + `\uffff\u0000\u0123\ufedc\ufffe`: "\uffff\u0000\u0123\ufedc\ufffe", `\Uffff0000\U0003fedc\U0010ffff\U00110000`: "\ufffd\U0003fedc\U0010ffff\ufffd", `\x0x\u012x\U0123456x`: "\\x0x\\u012x\\U0123456x", `\U0123456`: "\\U0123456", diff --git a/write_test.go b/write_test.go index 18809b1..8cca449 100644 --- a/write_test.go +++ b/write_test.go @@ -13,22 +13,23 @@ var writeTests = map[string]*Document{ }, "title\n\tfoo bar baz\n" + - "links\n\tqwe asd\n\tzxc 123\n\tfoo\n" + - "site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n\ttest\n": &Document{ + "links\n\tqwe\\ asd zxc 123\n\tfoo\n" + + "site\n\tfoo Foo\n\t\tbar\n\t\tbaz/qu\\ ux Test\n\t\t\t123\n\ttest\n": &Document{ Title: "foo bar baz", Links: []Link{ - Link{"qwe", "asd", ""}, - Link{"zxc", "123", ""}, + Link{"qwe asd", "zxc 123", ""}, Link{"foo", "", ""}, }, Site: Site{ Children: []Site{ Site{ Path: "foo", + Name: "Foo", Children: []Site{ Site{Path: "bar"}, Site{ - Path: "baz/quux", + Path: "baz/qu ux", + Name: "Test", Children: []Site{ Site{Path: "123"}, }, |