From 26248678aafc2f8e277d4bdafc116f2b349b02c5 Mon Sep 17 00:00:00 2001 From: clsr Date: Fri, 18 Aug 2017 13:45:49 +0200 Subject: Initial commit --- cnmfmt/cnmfmt.go | 525 +++++++++++++++++++++++++++++++++++++ cnmfmt/cnmfmt_test.go | 457 ++++++++++++++++++++++++++++++++ content.go | 610 +++++++++++++++++++++++++++++++++++++++++++ document.go | 278 ++++++++++++++++++++ parse.go | 189 ++++++++++++++ parse_test.go | 701 ++++++++++++++++++++++++++++++++++++++++++++++++++ simpletext.go | 196 ++++++++++++++ simpletext_test.go | 180 +++++++++++++ token.go | 58 +++++ write_test.go | 218 ++++++++++++++++ 10 files changed, 3412 insertions(+) create mode 100644 cnmfmt/cnmfmt.go create mode 100644 cnmfmt/cnmfmt_test.go create mode 100644 content.go create mode 100644 document.go create mode 100644 parse.go create mode 100644 parse_test.go create mode 100644 simpletext.go create mode 100644 simpletext_test.go create mode 100644 token.go create mode 100644 write_test.go diff --git a/cnmfmt/cnmfmt.go b/cnmfmt/cnmfmt.go new file mode 100644 index 0000000..cb8dc64 --- /dev/null +++ b/cnmfmt/cnmfmt.go @@ -0,0 +1,525 @@ +// Package cnmfmt provides parsing and composition for CNMfmt formatting. +package cnmfmt // import "contnet.org/lib/cnm-go/cnmfmt" + +import ( + "bytes" + "io" + "strings" + + "contnet.org/lib/cnm-go" +) + +func init() { + cnm.RegisterTextContentParser("fmt", parseTextFmt) +} + +// Text represents a paragraph of CNMfmt text. +type Text struct { + // Spans are spans of formatted text. + Spans []Span +} + +// ParseParagraph parses a single CNMfmt text paragraph s. +func ParseParagraph(s string) Text { + s = cnm.CollapseWhitespace(s) + + t := Text{} + var buf bytes.Buffer + format := Format{} + last := rune(-1) + url := false + + for _, r := range s { + if url && format.Link == "" { // need URL for link + if handleURL(r, &last, &format, &buf) { + continue + } + } + + switch r { + case '*', '/', '_', '`', '@': + handleTag(r, &last, &t, &format, &buf, &url) + + case '\\': + if last == '\\' { + buf.WriteString("\\\\") + last = -1 + } else { + if last >= 0 { + buf.WriteRune(last) + } + last = '\\' + } + + default: + if last >= 0 { + buf.WriteRune(last) + } + buf.WriteRune(r) + last = -1 + } + } + + if url && format.Link == "" { + if last >= 0 { + buf.WriteRune(last) + } + format.Link = Unescape(buf.String()) + buf.Reset() + } else if last >= 0 { + buf.WriteRune(last) + } + last = -1 + handleTag(-1, &last, &t, &format, &buf, &url) + + t.trimUnescape() + + return t +} + +func (t *Text) trimUnescape() { + var spans []Span + + for _, span := range t.Spans { + if span.Text != "" || span.Format.Link != "" { + spans = append(spans, span) + } + } + t.Spans, spans = spans, nil + + for i := len(t.Spans) - 1; i >= 0; i-- { + span := t.Spans[i] + if span.Text != "" || span.Format.Link != "" { + spans = append(spans, span) + } + } + for i := 0; i < len(spans)/2; i++ { + spans[i], spans[len(spans)-1-i] = spans[len(spans)-1-i], spans[i] + } + t.Spans = spans + + for i := range t.Spans { + t.Spans[i].Text = Unescape(t.Spans[i].Text) + } +} + +func (t *Text) appendSpan(format Format, txt string) { + if txt != "" || format.Link != "" { + t.Spans = append(t.Spans, Span{format, txt}) + } +} + +func handleURL(r rune, last *rune, format *Format, buf *bytes.Buffer) bool { + if r == '@' && *last == '@' { // end without text + format.Link = Unescape(buf.String()) + buf.Reset() + return false + } else if *last == '\\' { + buf.WriteByte('\\') + buf.WriteRune(r) + *last = -1 + } else if r == '\\' || r == '@' { + *last = r + } else if r != ' ' { // url + buf.WriteRune(r) + } else if buf.Len() > 0 { // space, then text + format.Link = Unescape(buf.String()) + buf.Reset() + } // else: prefix space + return true +} + +func handleTag(r rune, last *rune, txt *Text, format *Format, buf *bytes.Buffer, url *bool) { + if *last == '\\' { + buf.WriteRune(r) + *last = -1 + } else if *last == r { + txt.appendSpan(*format, buf.String()) + buf.Reset() + switch r { + case '*': + format.Bold = !format.Bold + case '/': + format.Italic = !format.Italic + case '_': + format.Underline = !format.Underline + case '`': + format.Monospace = !format.Monospace + case '@': + format.Link = "" + *url = !*url + } + *last = -1 + } else { + switch *last { + case '*', '/', '_', '`', '@': + buf.WriteRune(*last) + } + *last = r + } +} + +// WriteIndent writes the formatted text indented by n tabs. +func (t Text) WriteIndent(w io.Writer, n int) error { + var state [5]byte // bold, italic, underline, monospace, link + si := 0 + format := Format{} + spans := EscapeSpans(t.Spans) + var line []string + for _, span := range spans { + order := tagOrder(state[:si], format, span.Format) + for _, f := range order { + switch f { + case '*': + format.Bold = !format.Bold + line = append(line, "**") + case '/': + format.Italic = !format.Italic + line = append(line, "//") + case '_': + format.Underline = !format.Underline + line = append(line, "__") + case '`': + format.Monospace = !format.Monospace + line = append(line, "``") + case '@': + if format.Link != "" { + line = append(line, "@@") + } + if span.Format.Link != "" { + pad := "" + if span.Text != "" { + pad = " " + } + line = append(line, "@@", cnm.Escape(span.Format.Link), pad) + } + } + } + line = append(line, span.Text) + si = cleanupTags(state[:], order, span.Format) + format = span.Format + } + return writeIndent(w, strings.Join(line, ""), n) +} + +func tagOrder(state []byte, old, new Format) []byte { + ldiff := "" + if old.Link != new.Link { + ldiff = "1" + } + diff := Format{ + Bold: old.Bold != new.Bold, + Italic: old.Italic != new.Italic, + Underline: old.Underline != new.Underline, + Monospace: old.Monospace != new.Monospace, + Link: ldiff, + } + + var order [5]byte + oi := 0 + for i := len(state) - 1; i >= 0; i-- { + switch state[i] { + case '*': + if diff.Bold { + order[oi] = '*' + oi++ + diff.Bold = false + } + case '/': + if diff.Italic { + order[oi] = '/' + oi++ + diff.Italic = false + } + case '_': + if diff.Underline { + order[oi] = '_' + oi++ + diff.Underline = false + } + case '`': + if diff.Monospace { + order[oi] = '`' + oi++ + diff.Monospace = false + } + case '@': + if diff.Link != "" { + order[oi] = '@' + oi++ + diff.Link = "" + } + } + } + + if diff.Bold { + order[oi] = '*' + oi++ + } + if diff.Italic { + order[oi] = '/' + oi++ + } + if diff.Underline { + order[oi] = '_' + oi++ + } + if diff.Monospace { + order[oi] = '`' + oi++ + } + if diff.Link != "" { + order[oi] = '@' + oi++ + } + + return order[:oi] +} + +func cleanupTags(state []byte, order []byte, format Format) int { + var newState [10]byte + copy(newState[:5], state) + copy(newState[5:], order) + for i := range newState { + switch newState[i] { + case '*': + if !format.Bold { + newState[i] = 0 + } + case '/': + if !format.Italic { + newState[i] = 0 + } + case '_': + if !format.Underline { + newState[i] = 0 + } + case '`': + if !format.Monospace { + newState[i] = 0 + } + case '@': + if format.Link == "" { + newState[i] = 0 + } + } + } + si := 0 + for _, f := range newState { + if f > 0 { + state[si] = f + si++ + } + } + return si +} + +// Span represents a span of text with a format. +type Span struct { + // Format is the format of the text. + Format Format + + // Text is the text content of the span. + Text string +} + +// Format represents a state of CNMfmt formatting. +type Format struct { + // Bold text. + Bold bool + + // Italic text. + Italic bool + + // Underlined text. + Underline bool + + // Monospaced text. + Monospace bool + + // Hyperlink URL (if non-empty). + Link string +} + +// Escape escapes CNMfmt and CNM text special characters. +func Escape(s string) string { + return EscapeFmt(cnm.Escape(s)) +} + +// EscapeSpans escapes CNMfmt and CNM text within spans. +// +// This function will not needlessly escape spaces at the start or end of a +// span if the sibling span contains nonspaces. +func EscapeSpans(spans []Span) []Span { + // XXX: this is an ugly solution + esc := make([]Span, len(spans)) + for i := range spans { + start := false + end := false + span := spans[i] + if i+1 < len(spans) { + s := spans[i+1].Text + if len(s) > 0 && s[0] != ' ' { + span.Text = span.Text + "x" + end = true + } + } + if i > 0 { + s := spans[i-1].Text + if len(s) > 0 && s[len(s)-1] != ' ' { + span.Text = "x" + span.Text + start = true + } + } + span.Text = Escape(span.Text) + if start { + span.Text = span.Text[1:] + } + if end { + span.Text = span.Text[:len(span.Text)-1] + } + esc[i] = span + } + return esc +} + +var escapeReplacer = strings.NewReplacer( + `*`, `\*`, + `/`, `\/`, + `_`, `\_`, + "`", "\\`", + `@`, `\@`, +) + +// EscapeFmt escapes only CNMfmt format toggle characters. +func EscapeFmt(s string) string { + return escapeReplacer.Replace(s) +} + +// Unescape resolves CNM text and CNMfmt escape sequences in s. +func Unescape(s string) string { + return cnm.Unescape(UnescapeFmt(s)) +} + +var unescapeReplacer = strings.NewReplacer( + `\\`, `\\`, + `\*`, `*`, + `\/`, `/`, + `\_`, `_`, + "\\`", "`", + `\@`, `@`, +) + +// UnescapeFmt resolves only CNMfmt escape sequences in s. +func UnescapeFmt(s string) string { + return unescapeReplacer.Replace(s) +} + +// TextFmtContents represents CNM `text fmt` contents. +type TextFmtContents struct { + Paragraphs []Text +} + +// NewTextFmtBlock creates a new `text fmt` block containing provided CNMfmt +// paragraphs. +func NewTextFmtBlock(paragraphs []Text) *cnm.TextBlock { + return cnm.NewTextBlock("fmt", TextFmtContents{paragraphs}) +} + +// WriteIndent writes the formatted text contents indented by n tabs. +func (tf TextFmtContents) WriteIndent(w io.Writer, n int) error { + for i, p := range tf.Paragraphs { + if i != 0 { + if err := writeIndent(w, "", 0); err != nil { + return err + } + } + if err := p.WriteIndent(w, n); err != nil { + return err + } + } + return nil +} + +// Parse parses paragraphs of CNMfmt text. +func Parse(paragraphs string) []Text { + var txt []Text + var paragraph []string + + for _, line := range strings.Split(paragraphs, "\n") { + end := false + if line != "" { + if strings.Trim(line, "\n\r\t\f ") == "" { + end = true + } else { + paragraph = append(paragraph, line) + } + } else if len(paragraph) > 0 { + end = true + } + if end { + txt = append(txt, ParseParagraph(strings.Join(paragraph, " "))) + paragraph = nil + } + } + if len(paragraph) > 0 { + txt = append(txt, ParseParagraph(strings.Join(paragraph, " "))) + } + + return txt +} + +func writeIndent(w io.Writer, s string, depth int) error { + const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" + + if s == "" { + _, err := w.Write([]byte{'\n'}) + return err + } + if depth == 0 { + _, err := w.Write([]byte(s + "\n")) + return err + } + + var ind string + if depth <= len(tabs) { + ind = tabs[:depth] + } else { + ind = strings.Repeat("\t", depth) + } + _, err := w.Write([]byte(ind + s + "\n")) + return err + +} + +func parseTextFmt(p *cnm.Parser, block *cnm.TokenBlock) (cnm.TextContents, error) { + txt := TextFmtContents{} + var paragraph []string + var err error + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.RawText() + end := false + if text, ok := token.(*cnm.TokenRawText); ok { + if strings.Trim(text.Text, "\n\r\t\f ") == "" { + end = true + } else { + paragraph = append(paragraph, text.Text) + } + } else if _, ok := token.(*cnm.TokenEmptyLine); ok && len(paragraph) > 0 { + end = true + } + if end { + txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " "))) + paragraph = nil + } + err = p.Next() + } + if len(paragraph) > 0 { + txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " "))) + } + return txt, err +} diff --git a/cnmfmt/cnmfmt_test.go b/cnmfmt/cnmfmt_test.go new file mode 100644 index 0000000..89a40a9 --- /dev/null +++ b/cnmfmt/cnmfmt_test.go @@ -0,0 +1,457 @@ +package cnmfmt + +import ( + "bytes" + "io" + "strings" + "testing" + + "contnet.org/lib/cnm-go" +) + +var parseTests = map[string]Text{ + "\\nfoo\nbar\\": Text{[]Span{ + Span{Format{}, "\nfoo bar\\"}, + }}, + "**foo": Text{[]Span{ + Span{Format{Bold: true}, "foo"}, + }}, + "//foo": Text{[]Span{ + Span{Format{Italic: true}, "foo"}, + }}, + "__foo": Text{[]Span{ + Span{Format{Underline: true}, "foo"}, + }}, + "``foo": Text{[]Span{ + Span{Format{Monospace: true}, "foo"}, + }}, + "foo*bar": Text{[]Span{ + Span{Format{}, "foo*bar"}, + }}, + "foo*": Text{[]Span{ + Span{Format{}, "foo*"}, + }}, + "foo**": Text{[]Span{ + Span{Format{}, "foo"}, + }}, + "foo***": Text{[]Span{ + Span{Format{}, "foo"}, + Span{Format{Bold: true}, "*"}, + }}, + "foo****": Text{[]Span{ + Span{Format{}, "foo"}, + }}, + "*foo": Text{[]Span{ + Span{Format{}, "*foo"}, + }}, + "****foo": Text{[]Span{ + Span{Format{}, "foo"}, + }}, + "******foo": Text{[]Span{ + Span{Format{Bold: true}, "foo"}, + }}, + "foo ** bar": Text{[]Span{ + Span{Format{}, "foo "}, + Span{Format{Bold: true}, " bar"}, + }}, + "foo** bar": Text{[]Span{ + Span{Format{}, "foo"}, + Span{Format{Bold: true}, " bar"}, + }}, + "foo **bar": Text{[]Span{ + Span{Format{}, "foo "}, + Span{Format{Bold: true}, "bar"}, + }}, + "foo ** bar ** baz": Text{[]Span{ + Span{Format{}, "foo "}, + Span{Format{Bold: true}, " bar "}, + Span{Format{}, " baz"}, + }}, + "foo ** bar** baz": Text{[]Span{ + Span{Format{}, "foo "}, + Span{Format{Bold: true}, " bar"}, + Span{Format{}, " baz"}, + }}, + "**__**foo": Text{[]Span{ + Span{Format{Underline: true}, "foo"}, + }}, + "***": Text{[]Span{ + Span{Format{Bold: true}, "*"}, + }}, + "*\\**": Text{[]Span{ + Span{Format{}, "***"}, + }}, + "\\*": Text{[]Span{ + Span{Format{}, "*"}, + }}, + "\\*\\*": Text{[]Span{ + Span{Format{}, "**"}, + }}, + "\\**": Text{[]Span{ + Span{Format{}, "**"}, + }}, + "*\\*": Text{[]Span{ + Span{Format{}, "**"}, + }}, + "\\": Text{[]Span{ + Span{Format{}, "\\"}, + }}, + "\\\\": Text{[]Span{ + Span{Format{}, "\\"}, + }}, + " ** // `` ": Text{[]Span{ + Span{Format{Bold: true}, " "}, + Span{Format{Bold: true, Italic: true}, " "}, + }}, + "**": Text{[]Span{}}, + "**``__//foo": Text{[]Span{ + Span{Format{Bold: true, Monospace: true, Underline: true, Italic: true}, "foo"}, + }}, + "**foo//bar**baz": Text{[]Span{ + Span{Format{Bold: true}, "foo"}, + Span{Format{Bold: true, Italic: true}, "bar"}, + Span{Format{Italic: true}, "baz"}, + }}, + "@@foo": Text{[]Span{ + Span{Format{Link: "foo"}, ""}, + }}, + "@@foo@@": Text{[]Span{ + Span{Format{Link: "foo"}, ""}, + }}, + "@@foo bar@@": Text{[]Span{ + Span{Format{Link: "foo"}, "bar"}, + }}, + "@@ foo": Text{[]Span{ + Span{Format{Link: "foo"}, ""}, + }}, + "@@foo ": Text{[]Span{ + Span{Format{Link: "foo"}, ""}, + }}, + "@@foo\\": Text{[]Span{ + Span{Format{Link: "foo\\"}, ""}, + }}, + "@@foo \\": Text{[]Span{ + Span{Format{Link: "foo"}, "\\"}, + }}, + "@@foo \\\\": Text{[]Span{ + Span{Format{Link: "foo"}, "\\"}, + }}, + "@@foo@": Text{[]Span{ + Span{Format{Link: "foo@"}, ""}, + }}, + "@@foo\\@@": Text{[]Span{ + Span{Format{Link: "foo@@"}, ""}, + }}, + "@@f\\\\o\\o\\n @": Text{[]Span{ + Span{Format{Link: "f\\o\\o\n"}, "@"}, + }}, + "@@http://example.com foo **bar @@baz**": Text{[]Span{ + Span{Format{Link: "http://example.com"}, "foo "}, + Span{Format{Bold: true, Link: "http://example.com"}, "bar "}, + Span{Format{Bold: true}, "baz"}, + }}, + "//@@http://example.com foo //bar @@": Text{[]Span{ + Span{Format{Italic: true, Link: "http://example.com"}, "foo "}, + Span{Format{Link: "http://example.com"}, "bar "}, + }}, + "__\\ asd \\ zxc\\ ": Text{[]Span{ + Span{Format{Underline: true, Monospace: false}, " asd zxc "}, + }}, + "@@/ test/@@": Text{[]Span{ + Span{Format{Link: "/"}, "test/"}, + }}, + "@@/ /test@@": Text{[]Span{ + Span{Format{Link: "/"}, "/test"}, + }}, + "/": Text{[]Span{ + Span{Format{}, "/"}, + }}, + "test/**": Text{[]Span{ + Span{Format{}, "test/"}, + }}, + "//test/": Text{[]Span{ + Span{Format{Italic: true}, "test/"}, + }}, + "/**test": Text{[]Span{ + Span{Format{}, "/"}, + Span{Format{Bold: true}, "test"}, + }}, +} + +func TestParseParagraph(t *testing.T) { + for k, v := range parseTests { + t.Run(k, func(t *testing.T) { + txt := ParseParagraph(k) + if !textEqual(txt, v) { + t.Errorf("ParseParagraph(%q):\nexpected: %#v\n got: %#v", k, v, txt) + } + }) + } +} + +func TestParse(t *testing.T) { + for k, v := range parseTests { + t.Run(k, func(t *testing.T) { + txts := Parse(k) + if len(txts) != 1 || !textEqual(txts[0], v) { + t.Errorf("Parse(%q):\nexpected: %#v\n got: %#v", k, []Text{v}, txts) + } + }) + } +} + +func textEqual(a, b Text) bool { + if len(a.Spans) != len(b.Spans) { + return false + } + for i := range a.Spans { + if a.Spans[i] != b.Spans[i] { + return false + } + } + return true +} + +var escapeTests = map[string]string{ + "\n\r\t\v\x00": "\\n\\r\\t\v\\x00", + "@@!!##__//__``**": "\\@\\@!!##\\_\\_\\/\\/\\_\\_\\`\\`\\*\\*", + `foo\@\@bar`: `foo\\\@\\\@bar`, +} + +func TestEscape(t *testing.T) { + for k, v := range escapeTests { + t.Run(k, func(t *testing.T) { + if e := Escape(k); e != v { + t.Errorf("Escape(%q): expected %q, got %q", k, v, e) + } + }) + } +} + +var parseTextTests = map[string]TextFmtContents{ + "foo ** bar\nbaz\n\n\nquux ** ": TextFmtContents{[]Text{ + Text{[]Span{ + Span{Format{}, "foo "}, + Span{Format{Bold: true}, " bar baz"}, + }}, + Text{[]Span{ + Span{Format{}, "quux "}, + }}, + }}, + + "\n": TextFmtContents{}, + + "foo": TextFmtContents{[]Text{ + Text{[]Span{ + Span{Format{}, "foo"}, + }}, + }}, + + "\n\n": TextFmtContents{}, + + "foo\n\t\t\t\t\nbar": TextFmtContents{[]Text{ + Text{[]Span{Span{Format{}, "foo"}}}, + Text{[]Span{Span{Format{}, "bar"}}}, + }}, + + "foo\n\t\t \f\r\t\nbar": TextFmtContents{[]Text{ + Text{[]Span{Span{Format{}, "foo"}}}, + Text{[]Span{Span{Format{}, "bar"}}}, + }}, + + `foo**bar\*\*baz\*\*quux**qweasd`: TextFmtContents{[]Text{Text{[]Span{ + Span{Format{}, "foo"}, + Span{Format{Bold: true}, "bar**baz**quux"}, + Span{Format{}, "qweasd"}, + }}}}, +} + +func TestParseTextFmt(t *testing.T) { + for k, v := range parseTextTests { + t.Run(k, func(t *testing.T) { + parser := cnm.NewParser(strings.NewReader(k)) + err := parser.Next() + if err != nil && err != io.EOF { + t.Fatalf("error parsing %q: %v", k, err) + } + content, err := parseTextFmt(parser, cnm.TopLevel) + if err != nil && err != io.EOF { + t.Fatalf("error parsing %q: %v", k, err) + } + tf, ok := content.(TextFmtContents) + if !ok { + t.Fatalf("%q: expected type %T, got %T", k, v, content) + } + if !paragraphsEqual(v.Paragraphs, tf.Paragraphs) { + t.Fatalf("%q:\nexpected: %#v\n got: %#v", k, v, tf) + } + txts := Parse(k) + if !paragraphsEqual(txts, v.Paragraphs) { + t.Fatalf("%q:\nexpected: %#v\n got: %#v", k, v.Paragraphs, txts) + } + }) + } +} + +func paragraphsEqual(a, b []Text) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if !textEqual(a[i], b[i]) { + return false + } + } + return true +} + +var writeTests = map[string]TextFmtContents{ + "": TextFmtContents{}, + + "foo\n": TextFmtContents{[]Text{ + Text{[]Span{ + Span{Format{}, "foo"}, + }}, + }}, + + "**foo\n": TextFmtContents{[]Text{ + Text{[]Span{ + Span{Format{Bold: true}, "foo"}, + }}, + }}, + + "foo **bar baz\n\nquux\n": TextFmtContents{[]Text{ + Text{[]Span{ + Span{Format{}, "foo "}, + Span{Format{Bold: true}, "bar baz"}, + }}, + Text{[]Span{ + Span{Format{}, "quux"}, + }}, + }}, + + "foo**bar``baz**quux\n\n" + + "\\ __qwe\\ __//\\ asd \\ //``zxc``**\\ \n\n" + + "//@@http://example.com exa//mple@@ @@href text@@// test\n": TextFmtContents{[]Text{ + Text{[]Span{ + Span{Format{}, "foo"}, + Span{Format{Bold: true}, "bar"}, + Span{Format{Bold: true, Monospace: true}, "baz"}, + Span{Format{Monospace: true}, "quux"}, + }}, + Text{[]Span{ + Span{Format{}, " "}, + Span{Format{Underline: true}, "qwe "}, + Span{Format{Italic: true}, " asd "}, + Span{Format{Monospace: true}, "zxc"}, + Span{Format{Bold: true}, " "}, + }}, + Text{[]Span{ + Span{Format{Italic: true, Link: "http://example.com"}, "exa"}, + Span{Format{Link: "http://example.com"}, "mple"}, + Span{Format{}, " "}, + Span{Format{Link: "href"}, "text"}, + Span{Format{Italic: true}, " test"}, + }}, + }}, + + "foo**bar\\*\\*baz\\*\\*quux**qweasd\n": TextFmtContents{[]Text{Text{[]Span{ + Span{Format{}, "foo"}, + Span{Format{Bold: true}, "bar**baz**quux"}, + Span{Format{}, "qweasd"}, + }}}}, +} + +func TestWriteTextFmt(t *testing.T) { + for k, v := range writeTests { + t.Run(k, func(t *testing.T) { + var buf bytes.Buffer + err := v.WriteIndent(&buf, 0) + if err != nil { + t.Fatalf("WriteIndent error: %v", err) + } + w := buf.String() + t.Log("expected:\n" + k) + t.Log(" got:\n" + w) + if k != w { + t.Fatalf("WriteIndent: output did not match expected document:\nexpected: %q\n got: %q", k, w) + } + }) + } +} + +func TestWriteParseTextFmt(t *testing.T) { + for k, v := range writeTests { + t.Run(k, func(t *testing.T) { + var buf bytes.Buffer + err := v.WriteIndent(&buf, 0) + if err != nil { + t.Fatalf("WriteIndent error: %v", err) + } + w := buf.String() + + if w == "" { + w = "\n" + } + parser := cnm.NewParser(strings.NewReader(w)) + err = parser.Next() + if err != nil && err != io.EOF { + t.Fatalf("error parsing %q: %v", w, err) + } + content, err := parseTextFmt(parser, cnm.TopLevel) + if err != nil && err != io.EOF { + t.Fatalf("error parsing %q: %v", w, err) + } + tf, ok := content.(TextFmtContents) + if !ok { + t.Fatalf("%q: expected type %T, got %T", w, v, content) + } + if !paragraphsEqual(v.Paragraphs, tf.Paragraphs) { + t.Fatalf("%q:\nexpected: %#v\n got: %#v", k, v, tf) + } + }) + } +} + +func TestParseWriteTextFmt(t *testing.T) { + for k, v := range writeTests { + t.Run(k, func(t *testing.T) { + s := k + if s == "" { + s = "\n" + } + parser := cnm.NewParser(strings.NewReader(s)) + err := parser.Next() + if err != nil && err != io.EOF { + t.Fatalf("error parsing %q: %v", k, err) + } + + content, err := parseTextFmt(parser, cnm.TopLevel) + if err != nil && err != io.EOF { + t.Fatalf("error parsing %q: %v", k, err) + } + tf, ok := content.(TextFmtContents) + if !ok { + t.Fatalf("%q: expected type %T, got %T", k, v, content) + } + if !paragraphsEqual(tf.Paragraphs, v.Paragraphs) { + t.Fatalf("%q: expected %#v, got %#v", k, v, tf) + } + + var buf bytes.Buffer + err = tf.WriteIndent(&buf, 0) + if err != nil { + t.Fatalf("WriteIndent error: %v", err) + } + + w := buf.String() + /*if w == "\n" { + k = "" + }*/ + + if k != w { + t.Fatalf("%q:\nexpected: %#v\n got: %#v", k, k, w) + } + }) + } +} diff --git a/content.go b/content.go new file mode 100644 index 0000000..971e9e3 --- /dev/null +++ b/content.go @@ -0,0 +1,610 @@ +package cnm + +import ( + "io" + "strings" +) + +func init() { + RegisterTextContentParser("", parseTextPlain) + RegisterTextContentParser("plain", parseTextPlain) + RegisterTextContentParser("pre", parseTextPre) +} + +// Block represents an arbitrary CNM within the "content" top-level block. +type Block interface { + // Name returns the name of the block. + Name() string + + // Args returns the block arguments. + Args() []string + WriteIndent(w io.Writer, n int) error +} + +// ContentBlock represents a block that holds other content blocks. +type ContentBlock struct { + name string + args []string + children []Block +} + +// WriteIndent writes the block header and its children indented by n tabs. +func (cb *ContentBlock) WriteIndent(w io.Writer, n int) error { + ss := []string{Escape(cb.name)} + ss = append(ss, cb.args...) + if err := writeIndent(w, JoinEscape(ss), n); err != nil { + return err + } + for _, ch := range cb.children { + if err := ch.WriteIndent(w, n+1); err != nil { + return err + } + } + return nil +} + +// NewContentBlock creates a new ContentBlock with a name and argument. +func NewContentBlock(name string, args ...string) *ContentBlock { + var a []string + for _, arg := range args { + if arg != "" { + a = append(a, arg) + } + } + return &ContentBlock{name: name, args: a} +} + +// Name returns the block's name. +func (cb *ContentBlock) Name() string { + return cb.name +} + +// Args returns the block arguments. +func (cb *ContentBlock) Args() []string { + return cb.args +} + +// Children returns the block's child blocks. +func (cb *ContentBlock) Children() []Block { + return cb.children +} + +// AppendChild adds a new child block to the end of the list of children. +func (cb *ContentBlock) AppendChild(block Block) { + cb.children = append(cb.children, block) +} + +func (cb *ContentBlock) parse(p *Parser, block *TokenBlock) (err error) { + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.Block() + if blk, ok := token.(*TokenBlock); ok { + var b Block + switch blk.Name { + case "section": + b, err = parseContentSection(p, blk) + case "text": + b, err = parseContentText(p, blk) + case "raw": + b, err = parseContentRaw(p, blk) + case "list": + b, err = parseContentList(p, blk) + case "table": + b, err = parseContentTable(p, blk) + case "embed": + b, err = parseContentEmbed(p, blk) + default: + err = parseUnknown(p, blk) + } + if b != nil { + cb.AppendChild(b) + } + } else if err = p.Next(); err != nil { + break + } + } + return +} + +// SectionBlock represents a "section" content block. +type SectionBlock struct { + ContentBlock +} + +// NewSectionBlock creates a new SectionBlock with a title. +func NewSectionBlock(title string) *SectionBlock { + return &SectionBlock{*NewContentBlock("section", title)} +} + +// Title returns the section block's title. +func (b *SectionBlock) Title() string { return strings.Join(b.args, " ") } + +func parseContentSection(p *Parser, block *TokenBlock) (*SectionBlock, error) { + sec := NewSectionBlock(strings.Join(block.Args, " ")) + if err := p.Next(); err != nil { + return sec, err + } + return sec, sec.parse(p, block) +} + +// TextBlock represents a "text" content block. +type TextBlock struct { + // Format is the text format (first word of the block argument). + Format string + // Contents are the text contents. + Contents TextContents +} + +// NewTextBlock creates a new TextBlock containing arbitrary text contents. +func NewTextBlock(format string, contents TextContents) *TextBlock { + return &TextBlock{format, contents} +} + +// Name returns the block name "text". +func (t *TextBlock) Name() string { return "text" } + +// Args returns the block's arguments (format). +func (t *TextBlock) Args() []string { + if t.Format == "" { + return nil + } + return []string{t.Format} +} + +// WriteIndent writes the block header and its content indented by n tabs. +func (t *TextBlock) WriteIndent(w io.Writer, n int) error { + s := t.Name() + if t.Format != "" { + s += " " + Escape(t.Format) + } + if err := writeIndent(w, s, n); err != nil { + return err + } + if err := t.Contents.WriteIndent(w, n+1); err != nil { + return err + } + return nil +} + +func parseContentText(p *Parser, block *TokenBlock) (*TextBlock, error) { + format := "" + if len(block.Args) >= 1 { + format = block.Args[0] + } + tb := NewTextBlock(format, nil) + + if err := p.Next(); err != nil { + return tb, err + } + + var err error + tb.Contents, err = parseTextFormat(p, block, tb.Format) + + return tb, err +} + +func parseTextFormat(p *Parser, block *TokenBlock, format string) (TextContents, error) { + if parser := GetTextContentParser(format); parser != nil { + return parser(p, block) + } + r, err := parseContentRaw(p, block) + return TextPreContents{r.Contents}, err +} + +// TextContents represents the textual contents of a text block. +type TextContents interface { + WriteIndent(w io.Writer, n int) error +} + +// TextContentParser parses text content in a text block. +type TextContentParser func(p *Parser, block *TokenBlock) (TextContents, error) + +var textContentParsers = map[string]TextContentParser{} + +// GetTextContentParser retrieves a text content parser or nil if it doesn't +// exist. +func GetTextContentParser(name string) TextContentParser { + return textContentParsers[name] +} + +// RegisterTextContentParser registers a new text content parser for a format. +func RegisterTextContentParser(name string, parser TextContentParser) { + if parser == nil { + delete(textContentParsers, name) + } else { + textContentParsers[name] = parser + } +} + +// TextPlainContents represents a list of simple text paragraphs. +type TextPlainContents struct { + // Paragraphs is a list of simple text paragraphs. + Paragraphs []string +} + +// WriteIndent writes the plain text content indented by n tabs. +func (t TextPlainContents) WriteIndent(w io.Writer, n int) error { + for i, p := range t.Paragraphs { + if i != 0 { + if err := writeIndent(w, "", 0); err != nil { + return err + } + } + if err := writeIndent(w, Escape(p), n); err != nil { + return err + } + } + return nil +} + +// NewTextPlainBlock creates a new TextBlock containing TextPlainContents. +func NewTextPlainBlock(paragraphs []string) *TextBlock { + par := make([]string, len(paragraphs)) + copy(par, paragraphs) + return NewTextBlock("", TextPlainContents{par}) +} + +func parseTextPlain(p *Parser, block *TokenBlock) (TextContents, error) { + txt := TextPlainContents{} + paragraph := "" + var err error + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.SimpleText() + end := false + if text, ok := token.(*TokenSimpleText); ok { + if text.Text == "" { + end = true + } else if paragraph == "" { + paragraph = text.Text + } else { + paragraph += " " + text.Text + } + } else if _, ok := token.(*TokenEmptyLine); ok && paragraph != "" { + end = true + } + if end { + txt.Paragraphs = append(txt.Paragraphs, paragraph) + paragraph = "" + } + err = p.Next() + } + if paragraph != "" { + txt.Paragraphs = append(txt.Paragraphs, paragraph) + } + return txt, err +} + +// TextPreContents represents preformatted contents of a text block. +type TextPreContents struct { + // Text is the preformatted content. + Text string +} + +// WriteIndent writes the preformatted text content indented by n tabs. +func (t TextPreContents) WriteIndent(w io.Writer, n int) error { + ss := strings.Split(t.Text, "\n") + for _, s := range ss { + if err := writeIndent(w, EscapeNonspace(s), n); err != nil { + return err + } + } + return nil +} + +// NewTextPreBlock creates a new TextBlock containing TextPreContents. +func NewTextPreBlock(text string) *TextBlock { + return NewTextBlock("", TextPreContents{text}) +} + +func parseTextPre(p *Parser, block *TokenBlock) (TextContents, error) { + var lines []string + var ls []string + var err error + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.RawText() + if text, ok := token.(*TokenRawText); ok { + if len(ls) > 0 { + lines = append(lines, ls...) + ls = ls[:0] + } + lines = append(lines, Unescape(text.Text)) + } else if _, ok := token.(*TokenEmptyLine); ok && len(lines) > 0 { + ls = append(ls, "") + } + err = p.Next() + } + return TextPreContents{strings.Join(lines, "\n")}, err +} + +// RawBlock represents a "raw" content block. +type RawBlock struct { + // Syntax is the syntax of the block contents (first word of block argument) + Syntax string + + // Contents is the raw content. + Contents string +} + +// Name returns the block name "raw". +func (r *RawBlock) Name() string { return "raw" } + +// Args returns the block's arguments (syntax). +func (r *RawBlock) Args() []string { + if r.Syntax == "" { + return nil + } + return []string{r.Syntax} +} + +// WriteIndent writes the raw content indented by n tabs. +func (r *RawBlock) WriteIndent(w io.Writer, n int) error { + s := r.Name() + if r.Syntax != "" { + s += " " + Escape(r.Syntax) + } + if err := writeIndent(w, s, n); err != nil { + return err + } + if r.Contents != "" { + ss := strings.Split(r.Contents, "\n") + for _, s := range ss { + if err := writeIndent(w, s, n+1); err != nil { + return err + } + } + } + return nil +} + +func parseContentRaw(p *Parser, block *TokenBlock) (*RawBlock, error) { + arg := "" + if len(block.Args) > 0 { + arg = block.Args[0] + } + rb := &RawBlock{arg, ""} + + if err := p.Next(); err != nil { + return rb, err + } + + var lines []string + var ls []string + var err error + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.RawText() + if text, ok := token.(*TokenRawText); ok { + if len(ls) > 0 { + lines = append(lines, ls...) + ls = ls[:0] + } + lines = append(lines, text.Text) + } else if _, ok := token.(*TokenEmptyLine); ok && len(lines) > 0 { + ls = append(ls, "") + } + err = p.Next() + } + rb.Contents = strings.Join(lines, "\n") + + return rb, err +} + +// ListBlock represents a "list" content block. +type ListBlock struct { + ContentBlock +} + +// NewListBlock creates a new ListBlock. +// +// If the ordered parameter is true, the list is created in "ordered" mode. +func NewListBlock(ordered bool) *ListBlock { + arg := "" + if ordered { + arg = "ordered" + } + return &ListBlock{*NewContentBlock("list", arg)} +} + +// Ordered returns true if the list is in ordered mode (first word of the +// block argument is "ordered"). +func (b *ListBlock) Ordered() bool { + return len(b.args) >= 1 && b.args[0] == "ordered" +} + +func parseContentList(p *Parser, block *TokenBlock) (*ListBlock, error) { + list := NewListBlock(false) + list.args = block.Args + if err := p.Next(); err != nil { + return list, err + } + return list, list.parse(p, block) +} + +// TableBlock represents a "table" content block. +type TableBlock struct { + rows []Block +} + +// NewTableBlock creates a new TableBlock. +func NewTableBlock() *TableBlock { + return &TableBlock{} +} + +// Name returns the block name "table". +func (t *TableBlock) Name() string { + return "table" +} + +// Args returns the block's nil arguments. +func (t *TableBlock) Args() []string { + return nil +} + +// WriteIndent writes the table header and contents indented by n tabs. +func (t *TableBlock) WriteIndent(w io.Writer, n int) error { + if err := writeIndent(w, t.Name(), n); err != nil { + return err + } + for _, row := range t.rows { + if err := row.WriteIndent(w, n+1); err != nil { + return err + } + } + return nil +} + +// Rows returns the table's rows. +func (t *TableBlock) Rows() []Block { + return t.rows +} + +// AppendRow adds a new row to the end of the table. +func (t *TableBlock) AppendRow(row Block) { + t.rows = append(t.rows, row) +} + +func (t *TableBlock) parse(p *Parser, block *TokenBlock) (err error) { + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.Block() + if blk, ok := token.(*TokenBlock); ok { + var b Block + switch blk.Name { + case "row": + b, err = parseTableRow(p, blk) + case "header": + b, err = parseTableHeader(p, blk) + default: + err = parseUnknown(p, blk) + } + if b != nil { + t.AppendRow(b) + } + } else if err = p.Next(); err != nil { + break + } + } + return +} + +func parseContentTable(p *Parser, block *TokenBlock) (*TableBlock, error) { + table := NewTableBlock() + if err := p.Next(); err != nil { + return table, err + } + return table, table.parse(p, block) +} + +// RowBlock represents a "row" table block. +type RowBlock struct { + ContentBlock +} + +// NewRowBlock creates a new RowBlock. +func NewRowBlock() *RowBlock { + return &RowBlock{*NewContentBlock("row", "")} +} + +func parseTableRow(p *Parser, block *TokenBlock) (*RowBlock, error) { + row := NewRowBlock() + if err := p.Next(); err != nil { + return row, err + } + return row, row.parse(p, block) +} + +// HeaderBlock represents a "header" table block. +type HeaderBlock struct { + ContentBlock +} + +// NewHeaderBlock creates a new HeaderBlock. +func NewHeaderBlock() *HeaderBlock { + return &HeaderBlock{*NewContentBlock("header", "")} +} + +func parseTableHeader(p *Parser, block *TokenBlock) (*HeaderBlock, error) { + hdr := NewHeaderBlock() + if err := p.Next(); err != nil { + return hdr, err + } + return hdr, hdr.parse(p, block) +} + +// EmbedBlock represents an "embed" content block. +type EmbedBlock struct { + // Type is the content type (first word of block argument). + Type string + + // URL is the content URL (second word of the block argument). + URL string + + // Description is the content description (block body as simple text). + Description string +} + +// Name returns the block name "embed". +func (e *EmbedBlock) Name() string { return "embed" } + +// Args returns the block argument (type and URL). +func (e *EmbedBlock) Args() []string { + if e.Type != "" && e.URL != "" { + return []string{e.Type, e.URL} + } + return []string{e.Type} +} + +// WriteIndent writes the embed block header and contents indented by n tabs. +func (e *EmbedBlock) WriteIndent(w io.Writer, n int) error { + if e.URL == "" { + return nil + } + + s := e.Name() + " " + if e.Type == "" { + s += "*/*" + } else { + s += Escape(e.Type) + } + s += " " + Escape(e.URL) + if err := writeIndent(w, s, n); err != nil { + return err + } + if err := writeIndent(w, Escape(e.Description), n+1); err != nil { + return err + } + return nil +} + +func parseContentEmbed(p *Parser, block *TokenBlock) (*EmbedBlock, error) { + embed := &EmbedBlock{} + if len(block.Args) >= 1 { + embed.Type = block.Args[0] + if len(block.Args) >= 2 { + embed.URL = block.Args[1] + } + } + if err := p.Next(); err != nil { + return embed, err + } + s, err := getSimpleText(p, block) + embed.Description = s + return embed, err +} diff --git a/document.go b/document.go new file mode 100644 index 0000000..b5cdbe5 --- /dev/null +++ b/document.go @@ -0,0 +1,278 @@ +// Package cnm implements CNM document parsing and composition. +package cnm // import "contnet.org/lib/cnm-go" + +import ( + "bufio" + "io" + "path" + "strings" +) + +// Document represents a CNM document. +type Document struct { + // Title is the document title (top-level "title" block). + Title string + + // Links is a list of document-level hyperlinks (top-level "links" block). + Links []Link + + // Site is a sitemap (top-level "site" block). + Site Site + + // Content is the document content (top-level "content" block). + Content *ContentBlock +} + +// ParseDocument parses a CNM document from r. +func ParseDocument(r io.Reader) (doc *Document, err error) { + p := NewParser(r) + doc = &Document{} + err = p.Next() + for err == nil { + token := p.Block() + if err = p.Next(); err != nil { + break + } + if blk, ok := token.(*TokenBlock); ok { + switch blk.Name { + case "title": + err = doc.parseTitle(p, blk) + case "links": + err = doc.parseLinks(p, blk) + case "site": + err = doc.Site.parse(p, blk) + case "content": + if doc.Content == nil { + doc.Content = &ContentBlock{name: "content"} + } + err = doc.Content.parse(p, blk) + default: + // discard lines inside this block + for err == nil { + if !p.Empty() && p.Indent() <= blk.Indent() { + break + } + err = p.Next() + } + } + } + } + if err == io.EOF { + err = nil + } + return +} + +func (doc *Document) Write(w io.Writer) error { + bw := bufio.NewWriter(w) + if doc.Title != "" { + if err := writeIndent(bw, "title", 0); err != nil { + return err + } + if err := writeIndent(bw, Escape(doc.Title), 1); err != nil { + return err + } + } + if len(doc.Links) > 0 { + if err := writeIndent(bw, "links", 0); err != nil { + return err + } + for _, link := range doc.Links { + if err := link.WriteIndent(bw, 1); err != nil { + return err + } + } + } + if len(doc.Site.Children) > 0 { + if err := writeIndent(bw, "site", 0); err != nil { + return err + } + for _, site := range doc.Site.Children { + if err := site.WriteIndent(bw, 1); err != nil { + return err + } + } + } + if doc.Content != nil { + if err := doc.Content.WriteIndent(bw, 0); err != nil { + return err + } + } + return bw.Flush() +} + +func (doc *Document) parseTitle(p *Parser, block *TokenBlock) (err error) { + s, err := getSimpleText(p, block) + if doc.Title == "" { + doc.Title = s + } else { + doc.Title += " " + s + } + return +} + +func (doc *Document) parseLinks(p *Parser, block *TokenBlock) (err error) { + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.Block() + if blk, ok := token.(*TokenBlock); ok { + if blk.Name == "" { + err = parseUnknown(p, blk) + } else { + link := Link{ + URL: blk.Name, + Name: strings.Join(blk.Args, " "), + } + doc.Links = append(doc.Links, link) + if err = p.Next(); err != nil { + break + } + doc.Links[len(doc.Links)-1].Description, err = getSimpleText(p, blk) + } + } + } + return +} + +func getSimpleText(p *Parser, block *TokenBlock) (s string, err error) { + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.SimpleText() + if text, ok := token.(*TokenSimpleText); ok && text.Text != "" { + if s == "" { + s = text.Text + } else { + s += " " + text.Text + } + } + + err = p.Next() + } + return +} + +// Link represents a document-level hyperlink in the "links" top-level block. +type Link struct { + // URL is the hyperlink URL. + URL string + + // Name is the hyperlink text. + Name string + + // Description is the description of the hyperlink. + Description string +} + +// WriteIndent writes the link URL, name and description indented by n tabs. +func (link Link) WriteIndent(w io.Writer, n int) error { + s := Escape(link.URL) + if link.Name != "" { + s += " " + Escape(link.Name) + } + if err := writeIndent(w, s, n); err != nil { + return err + } + if link.Description != "" { + if err := writeIndent(w, Escape(link.Description), n+1); err != nil { + return err + } + } + return nil +} + +// Site represents a node in the sitemap in the "site" top-level block. +type Site struct { + // Path is the node's path fragment. + Path string + + // Name is the node's name. + Name string + + // Children are the nodes below this node. + Children []Site +} + +// WriteIndent writes the sitemap indented by n tabs. +func (site Site) WriteIndent(w io.Writer, n int) error { + s := Escape(site.Path) + if site.Name != "" { + s += " " + Escape(site.Name) + } + if err := writeIndent(w, s, n); err != nil { + return err + } + for _, ch := range site.Children { + if err := ch.WriteIndent(w, n+1); err != nil { + return err + } + } + return nil +} + +func (site *Site) parse(p *Parser, block *TokenBlock) (err error) { + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + + token := p.Block() + if blk, ok := token.(*TokenBlock); ok { + if blk.Name == "" { + err = parseUnknown(p, blk) + } else { + s := Site{ + Path: strings.Trim(path.Clean(blk.Name), "/"), + Name: strings.Join(blk.Args, " "), + } + site.Children = append(site.Children, s) + if err = p.Next(); err != nil { + break + } + err = site.Children[len(site.Children)-1].parse(p, blk) + } + } else { + err = p.Next() + } + } + return +} + +func parseUnknown(p *Parser, block *TokenBlock) (err error) { + err = p.Next() + for err == nil { + if !p.Empty() && p.Indent() <= block.Indent() { + break + } + // discard lines inside this block + err = p.Next() + } + return +} + +func writeIndent(w io.Writer, s string, depth int) error { + const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" + + if s == "" { + _, err := w.Write([]byte{'\n'}) + return err + } + if depth == 0 { + _, err := w.Write([]byte(s + "\n")) + return err + } + + var ind string + if depth <= len(tabs) { + ind = tabs[:depth] + } else { + ind = strings.Repeat("\t", depth) + } + _, err := w.Write([]byte(ind + s + "\n")) + return err +} diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..02f7eb2 --- /dev/null +++ b/parse.go @@ -0,0 +1,189 @@ +package cnm + +import ( + "bufio" + "io" +) + +// TopLevel represents the top-level block. +var TopLevel = &TokenBlock{ + TokenLine: TokenLine{ + Indentation: -1, + RawLine: "", + LineNo: 0, + }, + Parent: nil, + Name: "", + Args: nil, +} + +// Parser parses a CNM document by lines. +type Parser struct { + r *bufio.Reader + line int + block *TokenBlock + current *TokenLine + end bool +} + +// NewParser creates a new Parser that reads from r. +func NewParser(r io.Reader) *Parser { + return &Parser{ + r: bufio.NewReader(r), + line: 0, + block: TopLevel, + current: nil, + end: false, + } +} + +// Line returns the number of the last parsed line in the document, starting +// with 1 after the first line. +func (p *Parser) Line() int { + return p.line +} + +// Next retrieves the next line. +func (p *Parser) Next() error { + line, err := p.nextLine() + if err != nil { + return err + } + indent := 0 + for _, c := range line { + if c != '\t' { + break + } + indent++ + } + if indent > p.block.Indent()+1 { + indent = p.block.Indent() + 1 + } + p.current = &TokenLine{ + Indentation: indent, + RawLine: line, + LineNo: p.line, + } + if p.current.Indent() <= p.block.Indent() && !p.Empty() { + p.block = p.block.Parent + } + return nil +} + +// Indent returns the indentation of the current line. +// +// Returns -1 if no line has been read yet. +func (p *Parser) Indent() int { + if p.current == nil { + return -1 + } + return p.current.Indent() +} + +// Empty returns true if the current line is empty. +func (p *Parser) Empty() bool { + if p.current == nil { + return true + } + if p.current.Indent() == len(p.current.Raw()) { + return true + } + return false +} + +// Block parses the current line in block mode. +// +// Returns a TokenBlock if the line was not empty, otherwise TokenEmptyLine. In +// block mode, a line is empty even if its indentation exceeds the block +// content indentation, as long as it only contains tab characters. +// +// Next() must have been called before calling Block(). +func (p *Parser) Block() Token { + line := p.current.Raw()[p.current.Indent():] + + /*indent := 0 + for _, c := range line { + if c != '\t' { + break + } + indent++ + } + if len(line) == indent { + return &TokenEmptyLine{*p.current} + }*/ + + ss := SplitUnescape(line) + if len(ss) == 0 || len(ss) == 1 && ss[0] == "" { + return &TokenEmptyLine{*p.current} + } + + block := TokenBlock{ + TokenLine: *p.current, + Parent: p.block, + } + block.Name = ss[0] + if len(ss) > 1 { + block.Args = ss[1:] + } + + p.block = &block + + return &block +} + +// RawText parses the current line as raw text. +// +// Returns a TokenRawText if the line was not empty, otherwise +// TokenEmptyLine. +// +// Next() must have been called before calling RawText(). +func (p *Parser) RawText() Token { + if p.Empty() { + return &TokenEmptyLine{*p.current} + } + return &TokenRawText{ + TokenLine: *p.current, + Text: p.current.Raw()[p.current.Indent():], + } +} + +// SimpleText parses the current line as simple text. +// +// Returns a TokenSimpleText if the line was not empty, otherwise +// TokenEmptyLine. +// +// Next() must have been called before calling SimpleText(). +func (p *Parser) SimpleText() Token { + if p.Empty() { + return &TokenEmptyLine{*p.current} + } + return &TokenSimpleText{ + TokenLine: *p.current, + Text: ParseSimpleText(p.current.Raw()[p.current.Indent():]), + } +} + +func (p *Parser) nextLine() (string, error) { + l, err := p.r.ReadString('\n') + if err == io.EOF { + if l != "" { + err = nil + } else if !p.end { // XXX + l = "\n" + p.end = true + err = nil + } + } + rs := make([]rune, len(l)) + ri := 0 + for _, r := range l { + switch r { + case '\n', '\r', '\x00': + continue + } + rs[ri] = r + ri++ + } + p.line++ + return string(rs[:ri]), err +} diff --git a/parse_test.go b/parse_test.go new file mode 100644 index 0000000..b47dc14 --- /dev/null +++ b/parse_test.go @@ -0,0 +1,701 @@ +package cnm + +import ( + "reflect" + "strings" + "testing" + + "github.com/davecgh/go-spew/spew" +) + +var parseTests = map[string]*Document{ + "": &Document{}, + + "foo\n\tbar\ntitle\n\ttest": &Document{ + Title: "test", + }, + + "foo\n\tbar\ntitle\n\ttest\nfoo\n\tbar": &Document{ + Title: "test", + }, + + "title\n\ttest": &Document{ + Title: "test", + }, + + "\ntitle\n\ttest\n": &Document{ + Title: "test", + }, + + "title\n\ttest\n\n": &Document{ + Title: "test", + }, + + "\ntitle\n\t\t\t\t\t\n\t\tfoo bar": &Document{ + Title: "foo bar", + }, + + "site\n\tfoo\nsite\n\t\tbar\n": &Document{ + Site: Site{Children: []Site{ + Site{Path: "foo"}, + }}, + }, + + "content\n\ttext\n\t\tfoo\ncontent\n\t\tbar\n": &Document{ + Content: &ContentBlock{ + name: "content", + children: []Block{ + &TextBlock{ + Format: "", + Contents: TextPlainContents{ + Paragraphs: []string{"foo"}, + }, + }, + }, + }, + }, + + "\n\ttitle\n\t\t\t\t\t\n\tfoo bar": &Document{}, + + "\ttitle\n\t\tfoo\n": &Document{}, + + "\tsite\n\t\tfoo\n": &Document{}, + + "\tlinks\n\t\tfoo\n": &Document{}, + + "links\n\tfoo": &Document{ + Links: []Link{ + Link{ + URL: "foo", + }, + }, + }, + + "qwe\ntitle\n\tasd": &Document{ + Title: "asd", + }, + + "links\n\t qwe\n\tasd": &Document{ + Links: []Link{ + Link{URL: "asd"}, + }, + }, + + "site\n\t qwe\n\tasd": &Document{ + Site: Site{Children: []Site{ + Site{Path: "asd"}, + }}, + }, + + "site\n\tba\\nr": &Document{ + Site: Site{ + Children: []Site{ + Site{ + Path: "ba\nr", + }, + }, + }, + }, + + "site\n\t\t\t\tba\\nr": &Document{}, + + "site\n\tfoo\tbar": &Document{ + Site: Site{ + Children: []Site{ + Site{ + Path: "foo", + Name: "bar", + }, + }, + }, + }, + + "\t\tsite\n\t\t\t\tfoo": &Document{}, + + "\tsite\n\tbar": &Document{}, + + "content\n\tsection test\n": &Document{ + Content: &ContentBlock{ + name: "content", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"test"}, + }}, + }, + }, + }, + + "content\n\tnosuchblock\n\tsection test\n": &Document{ + Content: &ContentBlock{ + name: "content", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"test"}, + }}, + }, + }, + }, + + "content\n\tnosuchblock\n\tsection test\n\n\tnosuchblock2": &Document{ + Content: &ContentBlock{ + name: "content", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"test"}, + }}, + }, + }, + }, + + "content\n\tsection\n\t\tnosuchblock\n\t\tsection\n\t\t\ttext\n\t\t\t\ttest": &Document{ + Content: &ContentBlock{ + name: "content", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: nil, + children: []Block{ + &TextBlock{ + Format: "", + Contents: TextPlainContents{Paragraphs: []string{ + "test", + }}, + }, + }, + }}, + }, + }}, + }, + }, + }, + + "content\n\ttable\n\t\tnosuchblock\n\t\trow\n\t\t\ttext\n\t\t\t\ttest": &Document{ + Content: &ContentBlock{ + name: "content", + args: nil, + children: []Block{ + &TableBlock{rows: []Block{ + &RowBlock{ContentBlock{ + name: "row", + args: nil, + children: []Block{ + &TextBlock{ + Format: "", + Contents: TextPlainContents{Paragraphs: []string{ + "test", + }}, + }, + }, + }}, + }}, + }, + }, + }, + + "site\n\t\ttest\n\tfoo\\ bar baz\n" + + "links\n\tfoo\\ bar baz\n\t\t\tquux\n" + + "content\n" + + "links\n\t\ttest\n" + + "content\n" + + "\tsection\n\tsection qweasd\n" + + "\tsection foo\\ bar baz\n\t\ttext\n\t\t\ttest\n": &Document{ + Site: Site{ + Children: []Site{ + Site{ + Path: "foo bar", + Name: "baz", + }, + }, + }, + Links: []Link{ + Link{ + URL: "foo bar", + Name: "baz", + Description: "quux", + }, + }, + Content: &ContentBlock{ + name: "content", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: nil, + }}, + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"qweasd"}, + }}, + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"foo bar baz"}, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "test", + }}, + }, + }, + }}, + }, + }, + }, + + "title\n\tfoo bar\n" + + "links\n\tqwe asd\n\tzxc 123\n" + + "site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n" + + "title\n\tbaz\n" + + "links\n\tfoo\n" + + "site\n\ttest": &Document{ + Title: "foo bar baz", + Links: []Link{ + Link{"qwe", "asd", ""}, + Link{"zxc", "123", ""}, + Link{"foo", "", ""}, + }, + Site: Site{ + Children: []Site{ + Site{ + Path: "foo", + Children: []Site{ + Site{Path: "bar"}, + Site{ + Path: "baz/quux", + Children: []Site{ + Site{Path: "123"}, + }, + }, + }, + }, + Site{Path: "test"}, + }, + }, + }, + + ` +thing stuff + whatever +title blah + + + Test + +title + document + +content + section Test section + text + This is \n just a + text pre + + t e \n s t + + + + preformatted text + + raw text/plain + of various \n features + + section of\ the + table + header + text + Column 1 + text + Column 2 + row + text + CNM + text + document + + format + row + section + lorem + list + text + ipsum + list ordered + list unordered + text + dolor + + sit + amet + embed text/cnm cnp://example.com/ stuff + thing + whatever +`: &Document{ + Title: "Test document", + Content: &ContentBlock{ + name: "content", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"Test section"}, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "This is \n just a", + }}, + }, + &TextBlock{ + Format: "pre", + Contents: TextPreContents{ + " t e \n s t \n\t\n\n\npreformatted text", + }, + }, + &RawBlock{ + Syntax: "text/plain", + Contents: "of various \\n features", + }, + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"of the"}, + children: []Block{ + &TableBlock{[]Block{ + &HeaderBlock{ContentBlock{ + name: "header", + args: nil, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "Column 1", + }}, + }, + &TextBlock{ + Contents: TextPlainContents{[]string{ + "Column 2", + }}, + }, + }, + }}, + &RowBlock{ContentBlock{ + name: "row", + args: nil, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "CNM", + }}, + }, + &TextBlock{ + Contents: TextPlainContents{[]string{ + "document", + "format", + }}, + }, + }, + }}, + &RowBlock{ContentBlock{ + name: "row", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: nil, + }}, + &ListBlock{ContentBlock{ + name: "list", + args: nil, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "ipsum", + }}, + }, + &ListBlock{ContentBlock{ + name: "list", + args: []string{"ordered"}, + children: []Block{ + &ListBlock{ContentBlock{ + name: "list", + args: []string{"unordered"}, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "dolor", + "sit amet", + }}, + }, + }, + }}, + }, + }}, + }, + }}, + }, + }}, + }}, + }, + }}, + }, + }}, + &EmbedBlock{ + Type: "text/cnm", + URL: "cnp://example.com/", + Description: "thing whatever", + }, + }, + }, + }, +} + +func TestParse(t *testing.T) { + for k, v := range parseTests { + t.Run(k, func(t *testing.T) { + d, err := ParseDocument(strings.NewReader(k)) + if err != nil { + t.Fatalf("ParseDocument(%q): error: %v", k, err) + } + if !documentEqual(d, v) { + t.Fatalf("ParseDocument(%q):\nexpected:\n%s\n got:\n%s", k, reprDoc(v), reprDoc(d)) + } + }) + } +} + +func reprDoc(d *Document) string { + //return fmt.Sprintf("Document{Title: %q, Links: %+v, Site: %+v, Content: %s}", d.Title, d.Links, d.Site, reprContent(d.Content)) + return spew.Sdump(d) +} + +func documentEqual(a, b *Document) bool { + if a.Title != b.Title { + return false + } + if len(a.Links) != len(b.Links) { + return false + } + for i := range a.Links { + if !linkEqual(a.Links[i], b.Links[i]) { + return false + } + } + if !siteEqual(a.Site, b.Site) { + return false + } + if !contentBlockEqual(a.Content, b.Content) { + return false + } + return true +} + +func linkEqual(a, b Link) bool { + return a == b +} + +func siteEqual(a, b Site) bool { + if a.Path != b.Path { + return false + } + if a.Name != b.Name { + return false + } + if len(a.Children) != len(b.Children) { + return false + } + for i := range a.Children { + if !siteEqual(a.Children[i], b.Children[i]) { + return false + } + } + return true +} + +func blockEqual(a, b Block) bool { + switch va := a.(type) { + case *SectionBlock: + vb, ok := b.(*SectionBlock) + if !ok { + return false + } + return sectionBlockEqual(va, vb) + + case *TextBlock: + vb, ok := b.(*TextBlock) + if !ok { + return false + } + return textBlockEqual(va, vb) + + case *RawBlock: + vb, ok := b.(*RawBlock) + if !ok { + return false + } + return rawBlockEqual(va, vb) + + case *ListBlock: + vb, ok := b.(*ListBlock) + if !ok { + return false + } + return listBlockEqual(va, vb) + + case *TableBlock: + vb, ok := b.(*TableBlock) + if !ok { + return false + } + return tableBlockEqual(va, vb) + + case *HeaderBlock: + vb, ok := b.(*HeaderBlock) + if !ok { + return false + } + return headerBlockEqual(va, vb) + + case *RowBlock: + vb, ok := b.(*RowBlock) + if !ok { + return false + } + return rowBlockEqual(va, vb) + + case *EmbedBlock: + vb, ok := b.(*EmbedBlock) + if !ok { + return false + } + return embedBlockEqual(va, vb) + + case *ContentBlock: + vb, ok := b.(*ContentBlock) + if !ok { + return false + } + return contentBlockEqual(va, vb) + + default: // shouldn't happen + return false + } +} + +func contentBlockEqual(a, b *ContentBlock) bool { + if (a == nil) != (b == nil) { + return false + } + if a == nil { + return true + } + if a.Name() != b.Name() { + return false + } + aa, ba := a.Args(), b.Args() + if len(aa) != len(ba) { + return false + } + for i := range aa { + if aa[i] != ba[i] { + return false + } + } + ca, cb := a.Children(), b.Children() + if len(ca) != len(cb) { + return false + } + for i := range ca { + if !blockEqual(ca[i], cb[i]) { + return false + } + } + return true +} + +func sectionBlockEqual(a, b *SectionBlock) bool { + return contentBlockEqual(&a.ContentBlock, &b.ContentBlock) +} + +func textBlockEqual(a, b *TextBlock) bool { + if a.Format != b.Format { + return false + } + return textContentsEqual(a.Contents, b.Contents) +} + +func textContentsEqual(a, b TextContents) bool { + switch va := a.(type) { + case TextPlainContents: + vb, ok := b.(TextPlainContents) + if !ok { + return false + } + return textPlainContentsEqual(va, vb) + + case TextPreContents: + vb, ok := b.(TextPreContents) + if !ok { + return false + } + return textPreContentsEqual(va, vb) + + default: + return reflect.TypeOf(a) == reflect.TypeOf(b) && reflect.DeepEqual(a, b) + } +} + +func textPlainContentsEqual(a, b TextPlainContents) bool { + if len(a.Paragraphs) != len(b.Paragraphs) { + return false + } + for i := range a.Paragraphs { + if a.Paragraphs[i] != b.Paragraphs[i] { + return false + } + } + return true +} + +func textPreContentsEqual(a, b TextPreContents) bool { + return a == b +} + +func rawBlockEqual(a, b *RawBlock) bool { + return *a == *b +} + +func listBlockEqual(a, b *ListBlock) bool { + return contentBlockEqual(&a.ContentBlock, &b.ContentBlock) +} + +func tableBlockEqual(a, b *TableBlock) bool { + ra, rb := a.Rows(), b.Rows() + if len(ra) != len(rb) { + return false + } + for i := range ra { + if !blockEqual(ra[i], rb[i]) { + return false + } + } + return true +} + +func rowBlockEqual(a, b *RowBlock) bool { + return contentBlockEqual(&a.ContentBlock, &b.ContentBlock) +} + +func headerBlockEqual(a, b *HeaderBlock) bool { + return contentBlockEqual(&a.ContentBlock, &b.ContentBlock) +} + +func embedBlockEqual(a, b *EmbedBlock) bool { + return *a == *b +} diff --git a/simpletext.go b/simpletext.go new file mode 100644 index 0000000..78e089e --- /dev/null +++ b/simpletext.go @@ -0,0 +1,196 @@ +package cnm + +import ( + "bytes" + "regexp" + "strconv" + "strings" +) + +// ParseSimpleText parses raw as simple text (collapses whitespace and resolves +// escape sequences). +func ParseSimpleText(raw string) string { + return Unescape(CollapseWhitespace(raw)) +} + +// CollapseWhitespace collapses sequences of non-escaped whitespace in raw CNM +// simple text into single spaces. +func CollapseWhitespace(raw string) string { + s := strings.Join(strings.FieldsFunc(raw, func(r rune) bool { + switch r { + case '\t', '\n', '\f', '\r': + return true + } + return false + }), " ") + + var buf bytes.Buffer + first := true + escape := false + space := false + for _, r := range s { + switch r { + case '\\': + if escape { + buf.WriteString("\\\\") + escape = false + } else { + escape = true + } + if space && !first { + buf.WriteByte(' ') + } + space = false + first = false + case ' ': + if escape { + buf.WriteString("\\ ") + escape = false + } else { + space = true + } + default: + if escape { + buf.WriteByte('\\') + } + if space && !first { + buf.WriteByte(' ') + } + buf.WriteRune(r) + escape = false + space = false + first = false + } + } + if escape { + buf.WriteByte('\\') + } + return buf.String() +} + +var escapeRe = regexp.MustCompile(`[\t\n\f\r\\\x00]|^ | $| `) + +// Escape escapes whitespace, backslash and and U+0000 within s. +func Escape(s string) string { + return escapeRe.ReplaceAllStringFunc(s, func(match string) string { + switch match { + case "\t": + return `\t` + case "\n": + return `\n` + case "\f": + return `\f` + case "\r": + return `\r` + case "\\": + return `\\` + case "\x00": + return `\x00` + case " ": + return `\ ` + case " ": + return ` \ ` + } + return match // this shouldn't happen + }) +} + +var escapeSpaceRe = regexp.MustCompile(`[\t\n]|^ | $| `) + +// EscapeSpace works like Escape, except it only escapes spaces, tabs and line +// feeds. +func EscapeSpace(s string) string { + return escapeSpaceRe.ReplaceAllStringFunc(s, func(match string) string { + switch match { + case "\t": + return `\t` + case "\n": + return `\n` + case " ": + return `\ ` + case " ": + return ` \ ` + } + return match // this shouldn't happen + }) +} + +var escapeNonspaceRe = regexp.MustCompile(`[\f\r\\\x00]`) + +// EscapeNonspace works like Escape, except it does not escape spaces, tabs and +// line feeds. +func EscapeNonspace(s string) string { + return escapeNonspaceRe.ReplaceAllStringFunc(s, func(match string) string { + switch match { + case "\f": + return `\f` + case "\r": + return `\r` + case "\\": + return `\\` + case "\x00": + return `\x00` + } + return match // this shouldn't happen + }) +} + +var unescapeRe = regexp.MustCompile(`\\(?:[btnvfr \\]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})`) + +// Unescape resolves escape sequences in simple text. +func Unescape(s string) string { + return unescapeRe.ReplaceAllStringFunc(s, func(match string) string { + switch match[1] { + case 'b': + return "\b" + case 't': + return "\t" + case 'n': + return "\n" + case 'v': + return "\v" + case 'f': + return "\f" + case 'r': + return "\r" + case ' ': + return " " + case '\\': + return "\\" + case 'x': + n, _ := strconv.ParseUint(match[2:], 16, 8) + return string(n) + case 'u': + n, _ := strconv.ParseUint(match[2:], 16, 16) + return string(n) + case 'U': + n, _ := strconv.ParseUint(match[2:], 16, 32) + return string(n) + } + return match // this shouldn't happen + }) +} + +var splitRe = regexp.MustCompile(`((?:[^\t\n\f\r\\ ]|\\.?)+|^)`) + +// SplitUnescape splits the string s by whitespace, then unescapes simple text +// escape sequences. +func SplitUnescape(s string) []string { + ss := splitRe.FindAllString(s, -1) + for i := range ss { + ss[i] = Unescape(ss[i]) + } + return ss +} + +// JoinEscape escapes each argument using simple text escape sequences and then +// joins them with spaces. +func JoinEscape(ss []string) string { + var l []string + for _, s := range ss { + if s != "" { + l = append(l, Escape(s)) + } + } + return strings.Join(l, " ") +} diff --git a/simpletext_test.go b/simpletext_test.go new file mode 100644 index 0000000..8fdf754 --- /dev/null +++ b/simpletext_test.go @@ -0,0 +1,180 @@ +package cnm + +import "testing" + +var simpleEscapes = map[string]string{ + "": ``, + "ContNet": `ContNet`, + "\t": `\t`, + "\n": `\n`, + "\f": `\f`, + "\r": `\r`, + " ": `\ `, + "\\": `\\`, + "\x00": `\x00`, + " ": `\ \ \ \ `, + " ": `\ \ \ \ `, + " ": `\ \ \ `, + " ": `\ \ \ `, + " ": `\ \ `, + " ": `\ \ `, + "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff", +} + +func TestEscape(t *testing.T) { + for k, v := range simpleEscapes { + t.Run(k, func(t *testing.T) { + e := Escape(k) + if e != v { + t.Errorf("Escape(%q) -> %q, expected %q", k, e, v) + } + }) + } +} + +var nonspaceEscapes = map[string]string{ + "": ``, + "ContNet": `ContNet`, + "\t": "\t", + "\n": "\n", + "\f": `\f`, + "\r": `\r`, + " ": ` `, + "\\": `\\`, + "\x00": `\x00`, + " ": ` `, + " ": ` `, + " ": ` `, + " ": ` `, + " ": ` `, + " ": ` `, + "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff", +} + +func TestEscapeNonspace(t *testing.T) { + for k, v := range nonspaceEscapes { + t.Run(k, func(t *testing.T) { + e := EscapeNonspace(k) + if e != v { + t.Errorf("EscapeNonspace(%q) -> %q, expected %q", k, e, v) + } + }) + } +} + +var spaceEscapes = map[string]string{ + "": ``, + "ContNet": `ContNet`, + "\t": `\t`, + "\n": `\n`, + "\f": "\f", + "\r": "\r", + " ": `\ `, + "\\": `\`, + "\x00": "\x00", + " ": `\ \ \ \ `, + " ": `\ \ \ \ `, + " ": `\ \ \ `, + " ": `\ \ \ `, + " ": `\ \ `, + " ": `\ \ `, + "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\x00\xff\u00ff\\xff", +} + +func TestEscapeSpace(t *testing.T) { + for k, v := range spaceEscapes { + t.Run(k, func(t *testing.T) { + e := EscapeSpace(k) + if e != v { + t.Errorf("EscapeSpace(%q) -> %q, expected %q", k, e, v) + } + }) + } +} + +var simpleUnescapes = map[string]string{ + ``: "", + `ContNet`: "ContNet", + `\b`: "\b", + `\t`: "\t", + `\n`: "\n", + `\v`: "\v", + `\f`: "\f", + `\r`: "\r", + `\ `: " ", + `\\`: "\\", + `\`: "\\", + `\x00`: "\x00", + `a\nb\ c\rd\be\\f`: "a\nb c\rd\be\\f", + `\n\n\n`: "\n\n\n", + `\x00\xff\n\x123`: "\x00\u00ff\n\x123", + " \b\\b\t\n\v\f\r\\x00\x00\\\\xff": " \b\b\t\n\v\f\r\x00\x00\\xff", + `\xAA\xAa\xaA\xaa`: "\u00aa\u00aa\u00aa\u00aa", + `\x00\xfg`: "\x00\\xfg", + `\\\\\\`: "\\\\\\", + "\b5Ὂg̀9!\\n℃ᾭG": "\b5Ὂg̀9!\n℃ᾭG", + "\xff\\x00\xee\xaa\xee": "\xff\x00\xee\xaa\xee", + "\\x00\x10\\ \x30\x40": "\x00\x10\x20\x30\x40", + "\x10\x50\x90\xe0": "\x10\x50\x90\xe0", + `Hello,\ 世界`: "Hello, 世界", + "\xed\x9f\xbf": "\xed\x9f\xbf", + "\xee\x80\x80": "\xee\x80\x80", + "\xef\xbf\xbd": "\xef\xbf\xbd", + "\x80\x80\x80\x80": "\x80\x80\x80\x80", + `\ \ \ `: " ", + `\uffff\u0000\u0123\ufedc\ufffe`: "\uffff\u0000\u0123\ufedc\ufffe", + `\Uffff0000\U0003fedc\U0010ffff\U00110000`: "\ufffd\U0003fedc\U0010ffff\ufffd", + `\x0x\u012x\U0123456x`: "\\x0x\\u012x\\U0123456x", + `\U0123456`: "\\U0123456", + `\u012`: "\\u012", + `\x0`: "\\x0", + `\x\u\U\a\z\0\-`: "\\x\\u\\U\\a\\z\\0\\-", +} + +func TestUnescape(t *testing.T) { + for k, v := range simpleUnescapes { + t.Run(k, func(t *testing.T) { + u := Unescape(k) + if u != v { + t.Errorf("Unescape(%q) -> %q, expected %q", k, u, v) + } + }) + } +} + +var simpleTexts = map[string]string{ + "foo": "foo", + "\n": "", + "\n\r \t\v\f": "\v", + " ": "", + `\ `: " ", + ` \ `: " ", + `\ `: " ", + `\ \ `: " ", + ` \`: "\\", + `\`: "\\", + ` \ `: " ", + ` `: "", + `\ \ `: " ", + ` \ `: " ", + " qwe asd ": "qwe asd", + "\\ qwe\nasd\n\nzxc\\n123\n": " qwe asd zxc\n123", + `\ \ \ \ \ `: " ", + ` \ \ \ `: " ", + ` \\ `: "\\", + `\ \\ `: " \\", + ` \\\ `: "\\ ", + ` \ \\\ `: " \\ ", + `\ \\ \ `: " \\ ", +} + +func TestParseSimpleText(t *testing.T) { + for k, v := range simpleTexts { + t.Run(k, func(t *testing.T) { + u := ParseSimpleText(k) + if u != v { + t.Errorf("ParseSimpleText(%q) -> %q, expected %q", k, u, v) + } + }) + } +} diff --git a/token.go b/token.go new file mode 100644 index 0000000..a6b08b9 --- /dev/null +++ b/token.go @@ -0,0 +1,58 @@ +package cnm + +// Token represents a parsed line in a CNM document. +type Token interface { + Indent() int + Raw() string + Line() int +} + +// TokenLine represents an arbitrary CNM line. +type TokenLine struct { + Indentation int + RawLine string + LineNo int +} + +// Indent returns the indentation of the parsed line. +func (t *TokenLine) Indent() int { return t.Indentation } + +// Raw returns the original unparsed line. +func (t *TokenLine) Raw() string { return t.RawLine } + +// Line returns the line number in the document, starting from 1. +func (t *TokenLine) Line() int { return t.LineNo } + +// TokenEmptyLine represents an empty line. +// +// A line is empty as long as it contains up to as many tab characters as the +// line's indentation and nothing else. +type TokenEmptyLine struct { + TokenLine +} + +// TokenBlock represents a block header line. +type TokenBlock struct { + TokenLine + // Parent is the parent block + Parent *TokenBlock + // Name is the block name. + Name string + // Args are the block arguments, split by whitespace and then parsed as + // simple text. + Args []string +} + +// TokenSimpleText represents a line of simple text. +type TokenSimpleText struct { + TokenLine + // Text is the line contents parsed as simple text. + Text string +} + +// TokenRawText represents a non-empty line with unparsed contents. +type TokenRawText struct { + TokenLine + // Text is the raw contents of the line with the indentation removed. + Text string +} diff --git a/write_test.go b/write_test.go new file mode 100644 index 0000000..fc13459 --- /dev/null +++ b/write_test.go @@ -0,0 +1,218 @@ +package cnm + +import ( + "bytes" + "testing" +) + +var writeTests = map[string]*Document{ + "": &Document{}, + + "title\n\tfoo bar\n": &Document{ + Title: "foo bar", + }, + + "title\n\tfoo bar baz\n" + + "links\n\tqwe asd\n\tzxc 123\n\tfoo\n" + + "site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n\ttest\n": &Document{ + Title: "foo bar baz", + Links: []Link{ + Link{"qwe", "asd", ""}, + Link{"zxc", "123", ""}, + Link{"foo", "", ""}, + }, + Site: Site{ + Children: []Site{ + Site{ + Path: "foo", + Children: []Site{ + Site{Path: "bar"}, + Site{ + Path: "baz/quux", + Children: []Site{ + Site{Path: "123"}, + }, + }, + }, + }, + Site{Path: "test"}, + }, + }, + }, + + `title + Test document +content + section Test section + text + This is \n just a + text pre + t e + s t + + raw text/plain + of various \n features + section of the + table + header + text + Column 1 + text + Column 2 + row + text + CNM + text + document + + format + row + section + list + text + ipsum + list ordered + list unordered + text + dolor + + sit amet + embed text/cnm cnp://example.com/ + thing whatever +`: &Document{ + Title: "Test document", + Content: &ContentBlock{ + name: "content", + args: nil, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"Test", "section"}, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "This is \n just a", + }}, + }, + &TextBlock{ + Format: "pre", + Contents: TextPreContents{ + " t e \n s t \n\t", + }, + }, + &RawBlock{ + Syntax: "text/plain", + Contents: "of various \\n features", + }, + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"of the"}, + children: []Block{ + &TableBlock{[]Block{ + &HeaderBlock{ContentBlock{ + name: "header", + args: []string{}, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "Column 1", + }}, + }, + &TextBlock{ + Contents: TextPlainContents{[]string{ + "Column 2", + }}, + }, + }, + }}, + &RowBlock{ContentBlock{ + name: "row", + args: []string{}, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "CNM", + }}, + }, + &TextBlock{ + Contents: TextPlainContents{[]string{ + "document", + "format", + }}, + }, + }, + }}, + &RowBlock{ContentBlock{ + name: "row", + args: []string{""}, + children: []Block{ + &SectionBlock{ContentBlock{ + name: "section", + args: []string{"", "", ""}, + }}, + &ListBlock{ContentBlock{ + name: "list", + args: nil, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "ipsum", + }}, + }, + &ListBlock{ContentBlock{ + name: "list", + args: []string{"ordered"}, + children: []Block{ + &ListBlock{ContentBlock{ + name: "list", + args: []string{"unordered"}, + children: []Block{ + &TextBlock{ + Contents: TextPlainContents{[]string{ + "dolor", + "sit amet", + }}, + }, + }, + }}, + }, + }}, + }, + }}, + }, + }}, + }}, + }, + }}, + }, + }}, + &EmbedBlock{ + Type: "text/cnm", + URL: "cnp://example.com/", + Description: "thing whatever", + }, + }, + }, + }, +} + +func TestWrite(t *testing.T) { + for k, v := range writeTests { + t.Run(k, func(t *testing.T) { + var buf bytes.Buffer + err := v.Write(&buf) + if err != nil { + t.Fatalf("Write error: %v", err) + } + w := buf.String() + t.Log("====================") + t.Log("expected:\n" + k) + t.Log("--------------------") + t.Log(" got:\n" + w) + t.Log("====================") + if k != w { + t.Fatal("Write: output did not match expected document") + } + }) + } +} -- cgit