summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorclsr <clsr@clsr.net>2017-08-18 13:45:49 +0200
committerclsr <clsr@clsr.net>2017-08-18 13:45:49 +0200
commit26248678aafc2f8e277d4bdafc116f2b349b02c5 (patch)
tree15f82488edb8c05aae756443284731875f36737c
downloadcnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.zip
cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.tar.gz
Initial commitv0.1.0
-rw-r--r--cnmfmt/cnmfmt.go525
-rw-r--r--cnmfmt/cnmfmt_test.go457
-rw-r--r--content.go610
-rw-r--r--document.go278
-rw-r--r--parse.go189
-rw-r--r--parse_test.go701
-rw-r--r--simpletext.go196
-rw-r--r--simpletext_test.go180
-rw-r--r--token.go58
-rw-r--r--write_test.go218
10 files changed, 3412 insertions, 0 deletions
diff --git a/cnmfmt/cnmfmt.go b/cnmfmt/cnmfmt.go
new file mode 100644
index 0000000..cb8dc64
--- /dev/null
+++ b/cnmfmt/cnmfmt.go
@@ -0,0 +1,525 @@
+// Package cnmfmt provides parsing and composition for CNMfmt formatting.
+package cnmfmt // import "contnet.org/lib/cnm-go/cnmfmt"
+
+import (
+ "bytes"
+ "io"
+ "strings"
+
+ "contnet.org/lib/cnm-go"
+)
+
+func init() {
+ cnm.RegisterTextContentParser("fmt", parseTextFmt)
+}
+
+// Text represents a paragraph of CNMfmt text.
+type Text struct {
+ // Spans are spans of formatted text.
+ Spans []Span
+}
+
+// ParseParagraph parses a single CNMfmt text paragraph s.
+func ParseParagraph(s string) Text {
+ s = cnm.CollapseWhitespace(s)
+
+ t := Text{}
+ var buf bytes.Buffer
+ format := Format{}
+ last := rune(-1)
+ url := false
+
+ for _, r := range s {
+ if url && format.Link == "" { // need URL for link
+ if handleURL(r, &last, &format, &buf) {
+ continue
+ }
+ }
+
+ switch r {
+ case '*', '/', '_', '`', '@':
+ handleTag(r, &last, &t, &format, &buf, &url)
+
+ case '\\':
+ if last == '\\' {
+ buf.WriteString("\\\\")
+ last = -1
+ } else {
+ if last >= 0 {
+ buf.WriteRune(last)
+ }
+ last = '\\'
+ }
+
+ default:
+ if last >= 0 {
+ buf.WriteRune(last)
+ }
+ buf.WriteRune(r)
+ last = -1
+ }
+ }
+
+ if url && format.Link == "" {
+ if last >= 0 {
+ buf.WriteRune(last)
+ }
+ format.Link = Unescape(buf.String())
+ buf.Reset()
+ } else if last >= 0 {
+ buf.WriteRune(last)
+ }
+ last = -1
+ handleTag(-1, &last, &t, &format, &buf, &url)
+
+ t.trimUnescape()
+
+ return t
+}
+
+func (t *Text) trimUnescape() {
+ var spans []Span
+
+ for _, span := range t.Spans {
+ if span.Text != "" || span.Format.Link != "" {
+ spans = append(spans, span)
+ }
+ }
+ t.Spans, spans = spans, nil
+
+ for i := len(t.Spans) - 1; i >= 0; i-- {
+ span := t.Spans[i]
+ if span.Text != "" || span.Format.Link != "" {
+ spans = append(spans, span)
+ }
+ }
+ for i := 0; i < len(spans)/2; i++ {
+ spans[i], spans[len(spans)-1-i] = spans[len(spans)-1-i], spans[i]
+ }
+ t.Spans = spans
+
+ for i := range t.Spans {
+ t.Spans[i].Text = Unescape(t.Spans[i].Text)
+ }
+}
+
+func (t *Text) appendSpan(format Format, txt string) {
+ if txt != "" || format.Link != "" {
+ t.Spans = append(t.Spans, Span{format, txt})
+ }
+}
+
+func handleURL(r rune, last *rune, format *Format, buf *bytes.Buffer) bool {
+ if r == '@' && *last == '@' { // end without text
+ format.Link = Unescape(buf.String())
+ buf.Reset()
+ return false
+ } else if *last == '\\' {
+ buf.WriteByte('\\')
+ buf.WriteRune(r)
+ *last = -1
+ } else if r == '\\' || r == '@' {
+ *last = r
+ } else if r != ' ' { // url
+ buf.WriteRune(r)
+ } else if buf.Len() > 0 { // space, then text
+ format.Link = Unescape(buf.String())
+ buf.Reset()
+ } // else: prefix space
+ return true
+}
+
+func handleTag(r rune, last *rune, txt *Text, format *Format, buf *bytes.Buffer, url *bool) {
+ if *last == '\\' {
+ buf.WriteRune(r)
+ *last = -1
+ } else if *last == r {
+ txt.appendSpan(*format, buf.String())
+ buf.Reset()
+ switch r {
+ case '*':
+ format.Bold = !format.Bold
+ case '/':
+ format.Italic = !format.Italic
+ case '_':
+ format.Underline = !format.Underline
+ case '`':
+ format.Monospace = !format.Monospace
+ case '@':
+ format.Link = ""
+ *url = !*url
+ }
+ *last = -1
+ } else {
+ switch *last {
+ case '*', '/', '_', '`', '@':
+ buf.WriteRune(*last)
+ }
+ *last = r
+ }
+}
+
+// WriteIndent writes the formatted text indented by n tabs.
+func (t Text) WriteIndent(w io.Writer, n int) error {
+ var state [5]byte // bold, italic, underline, monospace, link
+ si := 0
+ format := Format{}
+ spans := EscapeSpans(t.Spans)
+ var line []string
+ for _, span := range spans {
+ order := tagOrder(state[:si], format, span.Format)
+ for _, f := range order {
+ switch f {
+ case '*':
+ format.Bold = !format.Bold
+ line = append(line, "**")
+ case '/':
+ format.Italic = !format.Italic
+ line = append(line, "//")
+ case '_':
+ format.Underline = !format.Underline
+ line = append(line, "__")
+ case '`':
+ format.Monospace = !format.Monospace
+ line = append(line, "``")
+ case '@':
+ if format.Link != "" {
+ line = append(line, "@@")
+ }
+ if span.Format.Link != "" {
+ pad := ""
+ if span.Text != "" {
+ pad = " "
+ }
+ line = append(line, "@@", cnm.Escape(span.Format.Link), pad)
+ }
+ }
+ }
+ line = append(line, span.Text)
+ si = cleanupTags(state[:], order, span.Format)
+ format = span.Format
+ }
+ return writeIndent(w, strings.Join(line, ""), n)
+}
+
+func tagOrder(state []byte, old, new Format) []byte {
+ ldiff := ""
+ if old.Link != new.Link {
+ ldiff = "1"
+ }
+ diff := Format{
+ Bold: old.Bold != new.Bold,
+ Italic: old.Italic != new.Italic,
+ Underline: old.Underline != new.Underline,
+ Monospace: old.Monospace != new.Monospace,
+ Link: ldiff,
+ }
+
+ var order [5]byte
+ oi := 0
+ for i := len(state) - 1; i >= 0; i-- {
+ switch state[i] {
+ case '*':
+ if diff.Bold {
+ order[oi] = '*'
+ oi++
+ diff.Bold = false
+ }
+ case '/':
+ if diff.Italic {
+ order[oi] = '/'
+ oi++
+ diff.Italic = false
+ }
+ case '_':
+ if diff.Underline {
+ order[oi] = '_'
+ oi++
+ diff.Underline = false
+ }
+ case '`':
+ if diff.Monospace {
+ order[oi] = '`'
+ oi++
+ diff.Monospace = false
+ }
+ case '@':
+ if diff.Link != "" {
+ order[oi] = '@'
+ oi++
+ diff.Link = ""
+ }
+ }
+ }
+
+ if diff.Bold {
+ order[oi] = '*'
+ oi++
+ }
+ if diff.Italic {
+ order[oi] = '/'
+ oi++
+ }
+ if diff.Underline {
+ order[oi] = '_'
+ oi++
+ }
+ if diff.Monospace {
+ order[oi] = '`'
+ oi++
+ }
+ if diff.Link != "" {
+ order[oi] = '@'
+ oi++
+ }
+
+ return order[:oi]
+}
+
+func cleanupTags(state []byte, order []byte, format Format) int {
+ var newState [10]byte
+ copy(newState[:5], state)
+ copy(newState[5:], order)
+ for i := range newState {
+ switch newState[i] {
+ case '*':
+ if !format.Bold {
+ newState[i] = 0
+ }
+ case '/':
+ if !format.Italic {
+ newState[i] = 0
+ }
+ case '_':
+ if !format.Underline {
+ newState[i] = 0
+ }
+ case '`':
+ if !format.Monospace {
+ newState[i] = 0
+ }
+ case '@':
+ if format.Link == "" {
+ newState[i] = 0
+ }
+ }
+ }
+ si := 0
+ for _, f := range newState {
+ if f > 0 {
+ state[si] = f
+ si++
+ }
+ }
+ return si
+}
+
+// Span represents a span of text with a format.
+type Span struct {
+ // Format is the format of the text.
+ Format Format
+
+ // Text is the text content of the span.
+ Text string
+}
+
+// Format represents a state of CNMfmt formatting.
+type Format struct {
+ // Bold text.
+ Bold bool
+
+ // Italic text.
+ Italic bool
+
+ // Underlined text.
+ Underline bool
+
+ // Monospaced text.
+ Monospace bool
+
+ // Hyperlink URL (if non-empty).
+ Link string
+}
+
+// Escape escapes CNMfmt and CNM text special characters.
+func Escape(s string) string {
+ return EscapeFmt(cnm.Escape(s))
+}
+
+// EscapeSpans escapes CNMfmt and CNM text within spans.
+//
+// This function will not needlessly escape spaces at the start or end of a
+// span if the sibling span contains nonspaces.
+func EscapeSpans(spans []Span) []Span {
+ // XXX: this is an ugly solution
+ esc := make([]Span, len(spans))
+ for i := range spans {
+ start := false
+ end := false
+ span := spans[i]
+ if i+1 < len(spans) {
+ s := spans[i+1].Text
+ if len(s) > 0 && s[0] != ' ' {
+ span.Text = span.Text + "x"
+ end = true
+ }
+ }
+ if i > 0 {
+ s := spans[i-1].Text
+ if len(s) > 0 && s[len(s)-1] != ' ' {
+ span.Text = "x" + span.Text
+ start = true
+ }
+ }
+ span.Text = Escape(span.Text)
+ if start {
+ span.Text = span.Text[1:]
+ }
+ if end {
+ span.Text = span.Text[:len(span.Text)-1]
+ }
+ esc[i] = span
+ }
+ return esc
+}
+
+var escapeReplacer = strings.NewReplacer(
+ `*`, `\*`,
+ `/`, `\/`,
+ `_`, `\_`,
+ "`", "\\`",
+ `@`, `\@`,
+)
+
+// EscapeFmt escapes only CNMfmt format toggle characters.
+func EscapeFmt(s string) string {
+ return escapeReplacer.Replace(s)
+}
+
+// Unescape resolves CNM text and CNMfmt escape sequences in s.
+func Unescape(s string) string {
+ return cnm.Unescape(UnescapeFmt(s))
+}
+
+var unescapeReplacer = strings.NewReplacer(
+ `\\`, `\\`,
+ `\*`, `*`,
+ `\/`, `/`,
+ `\_`, `_`,
+ "\\`", "`",
+ `\@`, `@`,
+)
+
+// UnescapeFmt resolves only CNMfmt escape sequences in s.
+func UnescapeFmt(s string) string {
+ return unescapeReplacer.Replace(s)
+}
+
+// TextFmtContents represents CNM `text fmt` contents.
+type TextFmtContents struct {
+ Paragraphs []Text
+}
+
+// NewTextFmtBlock creates a new `text fmt` block containing provided CNMfmt
+// paragraphs.
+func NewTextFmtBlock(paragraphs []Text) *cnm.TextBlock {
+ return cnm.NewTextBlock("fmt", TextFmtContents{paragraphs})
+}
+
+// WriteIndent writes the formatted text contents indented by n tabs.
+func (tf TextFmtContents) WriteIndent(w io.Writer, n int) error {
+ for i, p := range tf.Paragraphs {
+ if i != 0 {
+ if err := writeIndent(w, "", 0); err != nil {
+ return err
+ }
+ }
+ if err := p.WriteIndent(w, n); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Parse parses paragraphs of CNMfmt text.
+func Parse(paragraphs string) []Text {
+ var txt []Text
+ var paragraph []string
+
+ for _, line := range strings.Split(paragraphs, "\n") {
+ end := false
+ if line != "" {
+ if strings.Trim(line, "\n\r\t\f ") == "" {
+ end = true
+ } else {
+ paragraph = append(paragraph, line)
+ }
+ } else if len(paragraph) > 0 {
+ end = true
+ }
+ if end {
+ txt = append(txt, ParseParagraph(strings.Join(paragraph, " ")))
+ paragraph = nil
+ }
+ }
+ if len(paragraph) > 0 {
+ txt = append(txt, ParseParagraph(strings.Join(paragraph, " ")))
+ }
+
+ return txt
+}
+
+func writeIndent(w io.Writer, s string, depth int) error {
+ const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+ if s == "" {
+ _, err := w.Write([]byte{'\n'})
+ return err
+ }
+ if depth == 0 {
+ _, err := w.Write([]byte(s + "\n"))
+ return err
+ }
+
+ var ind string
+ if depth <= len(tabs) {
+ ind = tabs[:depth]
+ } else {
+ ind = strings.Repeat("\t", depth)
+ }
+ _, err := w.Write([]byte(ind + s + "\n"))
+ return err
+
+}
+
+func parseTextFmt(p *cnm.Parser, block *cnm.TokenBlock) (cnm.TextContents, error) {
+ txt := TextFmtContents{}
+ var paragraph []string
+ var err error
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.RawText()
+ end := false
+ if text, ok := token.(*cnm.TokenRawText); ok {
+ if strings.Trim(text.Text, "\n\r\t\f ") == "" {
+ end = true
+ } else {
+ paragraph = append(paragraph, text.Text)
+ }
+ } else if _, ok := token.(*cnm.TokenEmptyLine); ok && len(paragraph) > 0 {
+ end = true
+ }
+ if end {
+ txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " ")))
+ paragraph = nil
+ }
+ err = p.Next()
+ }
+ if len(paragraph) > 0 {
+ txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " ")))
+ }
+ return txt, err
+}
diff --git a/cnmfmt/cnmfmt_test.go b/cnmfmt/cnmfmt_test.go
new file mode 100644
index 0000000..89a40a9
--- /dev/null
+++ b/cnmfmt/cnmfmt_test.go
@@ -0,0 +1,457 @@
+package cnmfmt
+
+import (
+ "bytes"
+ "io"
+ "strings"
+ "testing"
+
+ "contnet.org/lib/cnm-go"
+)
+
+var parseTests = map[string]Text{
+ "\\nfoo\nbar\\": Text{[]Span{
+ Span{Format{}, "\nfoo bar\\"},
+ }},
+ "**foo": Text{[]Span{
+ Span{Format{Bold: true}, "foo"},
+ }},
+ "//foo": Text{[]Span{
+ Span{Format{Italic: true}, "foo"},
+ }},
+ "__foo": Text{[]Span{
+ Span{Format{Underline: true}, "foo"},
+ }},
+ "``foo": Text{[]Span{
+ Span{Format{Monospace: true}, "foo"},
+ }},
+ "foo*bar": Text{[]Span{
+ Span{Format{}, "foo*bar"},
+ }},
+ "foo*": Text{[]Span{
+ Span{Format{}, "foo*"},
+ }},
+ "foo**": Text{[]Span{
+ Span{Format{}, "foo"},
+ }},
+ "foo***": Text{[]Span{
+ Span{Format{}, "foo"},
+ Span{Format{Bold: true}, "*"},
+ }},
+ "foo****": Text{[]Span{
+ Span{Format{}, "foo"},
+ }},
+ "*foo": Text{[]Span{
+ Span{Format{}, "*foo"},
+ }},
+ "****foo": Text{[]Span{
+ Span{Format{}, "foo"},
+ }},
+ "******foo": Text{[]Span{
+ Span{Format{Bold: true}, "foo"},
+ }},
+ "foo ** bar": Text{[]Span{
+ Span{Format{}, "foo "},
+ Span{Format{Bold: true}, " bar"},
+ }},
+ "foo** bar": Text{[]Span{
+ Span{Format{}, "foo"},
+ Span{Format{Bold: true}, " bar"},
+ }},
+ "foo **bar": Text{[]Span{
+ Span{Format{}, "foo "},
+ Span{Format{Bold: true}, "bar"},
+ }},
+ "foo ** bar ** baz": Text{[]Span{
+ Span{Format{}, "foo "},
+ Span{Format{Bold: true}, " bar "},
+ Span{Format{}, " baz"},
+ }},
+ "foo ** bar** baz": Text{[]Span{
+ Span{Format{}, "foo "},
+ Span{Format{Bold: true}, " bar"},
+ Span{Format{}, " baz"},
+ }},
+ "**__**foo": Text{[]Span{
+ Span{Format{Underline: true}, "foo"},
+ }},
+ "***": Text{[]Span{
+ Span{Format{Bold: true}, "*"},
+ }},
+ "*\\**": Text{[]Span{
+ Span{Format{}, "***"},
+ }},
+ "\\*": Text{[]Span{
+ Span{Format{}, "*"},
+ }},
+ "\\*\\*": Text{[]Span{
+ Span{Format{}, "**"},
+ }},
+ "\\**": Text{[]Span{
+ Span{Format{}, "**"},
+ }},
+ "*\\*": Text{[]Span{
+ Span{Format{}, "**"},
+ }},
+ "\\": Text{[]Span{
+ Span{Format{}, "\\"},
+ }},
+ "\\\\": Text{[]Span{
+ Span{Format{}, "\\"},
+ }},
+ " ** // `` ": Text{[]Span{
+ Span{Format{Bold: true}, " "},
+ Span{Format{Bold: true, Italic: true}, " "},
+ }},
+ "**": Text{[]Span{}},
+ "**``__//foo": Text{[]Span{
+ Span{Format{Bold: true, Monospace: true, Underline: true, Italic: true}, "foo"},
+ }},
+ "**foo//bar**baz": Text{[]Span{
+ Span{Format{Bold: true}, "foo"},
+ Span{Format{Bold: true, Italic: true}, "bar"},
+ Span{Format{Italic: true}, "baz"},
+ }},
+ "@@foo": Text{[]Span{
+ Span{Format{Link: "foo"}, ""},
+ }},
+ "@@foo@@": Text{[]Span{
+ Span{Format{Link: "foo"}, ""},
+ }},
+ "@@foo bar@@": Text{[]Span{
+ Span{Format{Link: "foo"}, "bar"},
+ }},
+ "@@ foo": Text{[]Span{
+ Span{Format{Link: "foo"}, ""},
+ }},
+ "@@foo ": Text{[]Span{
+ Span{Format{Link: "foo"}, ""},
+ }},
+ "@@foo\\": Text{[]Span{
+ Span{Format{Link: "foo\\"}, ""},
+ }},
+ "@@foo \\": Text{[]Span{
+ Span{Format{Link: "foo"}, "\\"},
+ }},
+ "@@foo \\\\": Text{[]Span{
+ Span{Format{Link: "foo"}, "\\"},
+ }},
+ "@@foo@": Text{[]Span{
+ Span{Format{Link: "foo@"}, ""},
+ }},
+ "@@foo\\@@": Text{[]Span{
+ Span{Format{Link: "foo@@"}, ""},
+ }},
+ "@@f\\\\o\\o\\n @": Text{[]Span{
+ Span{Format{Link: "f\\o\\o\n"}, "@"},
+ }},
+ "@@http://example.com foo **bar @@baz**": Text{[]Span{
+ Span{Format{Link: "http://example.com"}, "foo "},
+ Span{Format{Bold: true, Link: "http://example.com"}, "bar "},
+ Span{Format{Bold: true}, "baz"},
+ }},
+ "//@@http://example.com foo //bar @@": Text{[]Span{
+ Span{Format{Italic: true, Link: "http://example.com"}, "foo "},
+ Span{Format{Link: "http://example.com"}, "bar "},
+ }},
+ "__\\ asd \\ zxc\\ ": Text{[]Span{
+ Span{Format{Underline: true, Monospace: false}, " asd zxc "},
+ }},
+ "@@/ test/@@": Text{[]Span{
+ Span{Format{Link: "/"}, "test/"},
+ }},
+ "@@/ /test@@": Text{[]Span{
+ Span{Format{Link: "/"}, "/test"},
+ }},
+ "/": Text{[]Span{
+ Span{Format{}, "/"},
+ }},
+ "test/**": Text{[]Span{
+ Span{Format{}, "test/"},
+ }},
+ "//test/": Text{[]Span{
+ Span{Format{Italic: true}, "test/"},
+ }},
+ "/**test": Text{[]Span{
+ Span{Format{}, "/"},
+ Span{Format{Bold: true}, "test"},
+ }},
+}
+
+func TestParseParagraph(t *testing.T) {
+ for k, v := range parseTests {
+ t.Run(k, func(t *testing.T) {
+ txt := ParseParagraph(k)
+ if !textEqual(txt, v) {
+ t.Errorf("ParseParagraph(%q):\nexpected: %#v\n got: %#v", k, v, txt)
+ }
+ })
+ }
+}
+
+func TestParse(t *testing.T) {
+ for k, v := range parseTests {
+ t.Run(k, func(t *testing.T) {
+ txts := Parse(k)
+ if len(txts) != 1 || !textEqual(txts[0], v) {
+ t.Errorf("Parse(%q):\nexpected: %#v\n got: %#v", k, []Text{v}, txts)
+ }
+ })
+ }
+}
+
+func textEqual(a, b Text) bool {
+ if len(a.Spans) != len(b.Spans) {
+ return false
+ }
+ for i := range a.Spans {
+ if a.Spans[i] != b.Spans[i] {
+ return false
+ }
+ }
+ return true
+}
+
+var escapeTests = map[string]string{
+ "\n\r\t\v\x00": "\\n\\r\\t\v\\x00",
+ "@@!!##__//__``**": "\\@\\@!!##\\_\\_\\/\\/\\_\\_\\`\\`\\*\\*",
+ `foo\@\@bar`: `foo\\\@\\\@bar`,
+}
+
+func TestEscape(t *testing.T) {
+ for k, v := range escapeTests {
+ t.Run(k, func(t *testing.T) {
+ if e := Escape(k); e != v {
+ t.Errorf("Escape(%q): expected %q, got %q", k, v, e)
+ }
+ })
+ }
+}
+
+var parseTextTests = map[string]TextFmtContents{
+ "foo ** bar\nbaz\n\n\nquux ** ": TextFmtContents{[]Text{
+ Text{[]Span{
+ Span{Format{}, "foo "},
+ Span{Format{Bold: true}, " bar baz"},
+ }},
+ Text{[]Span{
+ Span{Format{}, "quux "},
+ }},
+ }},
+
+ "\n": TextFmtContents{},
+
+ "foo": TextFmtContents{[]Text{
+ Text{[]Span{
+ Span{Format{}, "foo"},
+ }},
+ }},
+
+ "\n\n": TextFmtContents{},
+
+ "foo\n\t\t\t\t\nbar": TextFmtContents{[]Text{
+ Text{[]Span{Span{Format{}, "foo"}}},
+ Text{[]Span{Span{Format{}, "bar"}}},
+ }},
+
+ "foo\n\t\t \f\r\t\nbar": TextFmtContents{[]Text{
+ Text{[]Span{Span{Format{}, "foo"}}},
+ Text{[]Span{Span{Format{}, "bar"}}},
+ }},
+
+ `foo**bar\*\*baz\*\*quux**qweasd`: TextFmtContents{[]Text{Text{[]Span{
+ Span{Format{}, "foo"},
+ Span{Format{Bold: true}, "bar**baz**quux"},
+ Span{Format{}, "qweasd"},
+ }}}},
+}
+
+func TestParseTextFmt(t *testing.T) {
+ for k, v := range parseTextTests {
+ t.Run(k, func(t *testing.T) {
+ parser := cnm.NewParser(strings.NewReader(k))
+ err := parser.Next()
+ if err != nil && err != io.EOF {
+ t.Fatalf("error parsing %q: %v", k, err)
+ }
+ content, err := parseTextFmt(parser, cnm.TopLevel)
+ if err != nil && err != io.EOF {
+ t.Fatalf("error parsing %q: %v", k, err)
+ }
+ tf, ok := content.(TextFmtContents)
+ if !ok {
+ t.Fatalf("%q: expected type %T, got %T", k, v, content)
+ }
+ if !paragraphsEqual(v.Paragraphs, tf.Paragraphs) {
+ t.Fatalf("%q:\nexpected: %#v\n got: %#v", k, v, tf)
+ }
+ txts := Parse(k)
+ if !paragraphsEqual(txts, v.Paragraphs) {
+ t.Fatalf("%q:\nexpected: %#v\n got: %#v", k, v.Paragraphs, txts)
+ }
+ })
+ }
+}
+
+func paragraphsEqual(a, b []Text) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if !textEqual(a[i], b[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+var writeTests = map[string]TextFmtContents{
+ "": TextFmtContents{},
+
+ "foo\n": TextFmtContents{[]Text{
+ Text{[]Span{
+ Span{Format{}, "foo"},
+ }},
+ }},
+
+ "**foo\n": TextFmtContents{[]Text{
+ Text{[]Span{
+ Span{Format{Bold: true}, "foo"},
+ }},
+ }},
+
+ "foo **bar baz\n\nquux\n": TextFmtContents{[]Text{
+ Text{[]Span{
+ Span{Format{}, "foo "},
+ Span{Format{Bold: true}, "bar baz"},
+ }},
+ Text{[]Span{
+ Span{Format{}, "quux"},
+ }},
+ }},
+
+ "foo**bar``baz**quux\n\n" +
+ "\\ __qwe\\ __//\\ asd \\ //``zxc``**\\ \n\n" +
+ "//@@http://example.com exa//mple@@ @@href text@@// test\n": TextFmtContents{[]Text{
+ Text{[]Span{
+ Span{Format{}, "foo"},
+ Span{Format{Bold: true}, "bar"},
+ Span{Format{Bold: true, Monospace: true}, "baz"},
+ Span{Format{Monospace: true}, "quux"},
+ }},
+ Text{[]Span{
+ Span{Format{}, " "},
+ Span{Format{Underline: true}, "qwe "},
+ Span{Format{Italic: true}, " asd "},
+ Span{Format{Monospace: true}, "zxc"},
+ Span{Format{Bold: true}, " "},
+ }},
+ Text{[]Span{
+ Span{Format{Italic: true, Link: "http://example.com"}, "exa"},
+ Span{Format{Link: "http://example.com"}, "mple"},
+ Span{Format{}, " "},
+ Span{Format{Link: "href"}, "text"},
+ Span{Format{Italic: true}, " test"},
+ }},
+ }},
+
+ "foo**bar\\*\\*baz\\*\\*quux**qweasd\n": TextFmtContents{[]Text{Text{[]Span{
+ Span{Format{}, "foo"},
+ Span{Format{Bold: true}, "bar**baz**quux"},
+ Span{Format{}, "qweasd"},
+ }}}},
+}
+
+func TestWriteTextFmt(t *testing.T) {
+ for k, v := range writeTests {
+ t.Run(k, func(t *testing.T) {
+ var buf bytes.Buffer
+ err := v.WriteIndent(&buf, 0)
+ if err != nil {
+ t.Fatalf("WriteIndent error: %v", err)
+ }
+ w := buf.String()
+ t.Log("expected:\n" + k)
+ t.Log(" got:\n" + w)
+ if k != w {
+ t.Fatalf("WriteIndent: output did not match expected document:\nexpected: %q\n got: %q", k, w)
+ }
+ })
+ }
+}
+
+func TestWriteParseTextFmt(t *testing.T) {
+ for k, v := range writeTests {
+ t.Run(k, func(t *testing.T) {
+ var buf bytes.Buffer
+ err := v.WriteIndent(&buf, 0)
+ if err != nil {
+ t.Fatalf("WriteIndent error: %v", err)
+ }
+ w := buf.String()
+
+ if w == "" {
+ w = "\n"
+ }
+ parser := cnm.NewParser(strings.NewReader(w))
+ err = parser.Next()
+ if err != nil && err != io.EOF {
+ t.Fatalf("error parsing %q: %v", w, err)
+ }
+ content, err := parseTextFmt(parser, cnm.TopLevel)
+ if err != nil && err != io.EOF {
+ t.Fatalf("error parsing %q: %v", w, err)
+ }
+ tf, ok := content.(TextFmtContents)
+ if !ok {
+ t.Fatalf("%q: expected type %T, got %T", w, v, content)
+ }
+ if !paragraphsEqual(v.Paragraphs, tf.Paragraphs) {
+ t.Fatalf("%q:\nexpected: %#v\n got: %#v", k, v, tf)
+ }
+ })
+ }
+}
+
+func TestParseWriteTextFmt(t *testing.T) {
+ for k, v := range writeTests {
+ t.Run(k, func(t *testing.T) {
+ s := k
+ if s == "" {
+ s = "\n"
+ }
+ parser := cnm.NewParser(strings.NewReader(s))
+ err := parser.Next()
+ if err != nil && err != io.EOF {
+ t.Fatalf("error parsing %q: %v", k, err)
+ }
+
+ content, err := parseTextFmt(parser, cnm.TopLevel)
+ if err != nil && err != io.EOF {
+ t.Fatalf("error parsing %q: %v", k, err)
+ }
+ tf, ok := content.(TextFmtContents)
+ if !ok {
+ t.Fatalf("%q: expected type %T, got %T", k, v, content)
+ }
+ if !paragraphsEqual(tf.Paragraphs, v.Paragraphs) {
+ t.Fatalf("%q: expected %#v, got %#v", k, v, tf)
+ }
+
+ var buf bytes.Buffer
+ err = tf.WriteIndent(&buf, 0)
+ if err != nil {
+ t.Fatalf("WriteIndent error: %v", err)
+ }
+
+ w := buf.String()
+ /*if w == "\n" {
+ k = ""
+ }*/
+
+ if k != w {
+ t.Fatalf("%q:\nexpected: %#v\n got: %#v", k, k, w)
+ }
+ })
+ }
+}
diff --git a/content.go b/content.go
new file mode 100644
index 0000000..971e9e3
--- /dev/null
+++ b/content.go
@@ -0,0 +1,610 @@
+package cnm
+
+import (
+ "io"
+ "strings"
+)
+
+func init() {
+ RegisterTextContentParser("", parseTextPlain)
+ RegisterTextContentParser("plain", parseTextPlain)
+ RegisterTextContentParser("pre", parseTextPre)
+}
+
+// Block represents an arbitrary CNM within the "content" top-level block.
+type Block interface {
+ // Name returns the name of the block.
+ Name() string
+
+ // Args returns the block arguments.
+ Args() []string
+ WriteIndent(w io.Writer, n int) error
+}
+
+// ContentBlock represents a block that holds other content blocks.
+type ContentBlock struct {
+ name string
+ args []string
+ children []Block
+}
+
+// WriteIndent writes the block header and its children indented by n tabs.
+func (cb *ContentBlock) WriteIndent(w io.Writer, n int) error {
+ ss := []string{Escape(cb.name)}
+ ss = append(ss, cb.args...)
+ if err := writeIndent(w, JoinEscape(ss), n); err != nil {
+ return err
+ }
+ for _, ch := range cb.children {
+ if err := ch.WriteIndent(w, n+1); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// NewContentBlock creates a new ContentBlock with a name and argument.
+func NewContentBlock(name string, args ...string) *ContentBlock {
+ var a []string
+ for _, arg := range args {
+ if arg != "" {
+ a = append(a, arg)
+ }
+ }
+ return &ContentBlock{name: name, args: a}
+}
+
+// Name returns the block's name.
+func (cb *ContentBlock) Name() string {
+ return cb.name
+}
+
+// Args returns the block arguments.
+func (cb *ContentBlock) Args() []string {
+ return cb.args
+}
+
+// Children returns the block's child blocks.
+func (cb *ContentBlock) Children() []Block {
+ return cb.children
+}
+
+// AppendChild adds a new child block to the end of the list of children.
+func (cb *ContentBlock) AppendChild(block Block) {
+ cb.children = append(cb.children, block)
+}
+
+func (cb *ContentBlock) parse(p *Parser, block *TokenBlock) (err error) {
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.Block()
+ if blk, ok := token.(*TokenBlock); ok {
+ var b Block
+ switch blk.Name {
+ case "section":
+ b, err = parseContentSection(p, blk)
+ case "text":
+ b, err = parseContentText(p, blk)
+ case "raw":
+ b, err = parseContentRaw(p, blk)
+ case "list":
+ b, err = parseContentList(p, blk)
+ case "table":
+ b, err = parseContentTable(p, blk)
+ case "embed":
+ b, err = parseContentEmbed(p, blk)
+ default:
+ err = parseUnknown(p, blk)
+ }
+ if b != nil {
+ cb.AppendChild(b)
+ }
+ } else if err = p.Next(); err != nil {
+ break
+ }
+ }
+ return
+}
+
+// SectionBlock represents a "section" content block.
+type SectionBlock struct {
+ ContentBlock
+}
+
+// NewSectionBlock creates a new SectionBlock with a title.
+func NewSectionBlock(title string) *SectionBlock {
+ return &SectionBlock{*NewContentBlock("section", title)}
+}
+
+// Title returns the section block's title.
+func (b *SectionBlock) Title() string { return strings.Join(b.args, " ") }
+
+func parseContentSection(p *Parser, block *TokenBlock) (*SectionBlock, error) {
+ sec := NewSectionBlock(strings.Join(block.Args, " "))
+ if err := p.Next(); err != nil {
+ return sec, err
+ }
+ return sec, sec.parse(p, block)
+}
+
+// TextBlock represents a "text" content block.
+type TextBlock struct {
+ // Format is the text format (first word of the block argument).
+ Format string
+ // Contents are the text contents.
+ Contents TextContents
+}
+
+// NewTextBlock creates a new TextBlock containing arbitrary text contents.
+func NewTextBlock(format string, contents TextContents) *TextBlock {
+ return &TextBlock{format, contents}
+}
+
+// Name returns the block name "text".
+func (t *TextBlock) Name() string { return "text" }
+
+// Args returns the block's arguments (format).
+func (t *TextBlock) Args() []string {
+ if t.Format == "" {
+ return nil
+ }
+ return []string{t.Format}
+}
+
+// WriteIndent writes the block header and its content indented by n tabs.
+func (t *TextBlock) WriteIndent(w io.Writer, n int) error {
+ s := t.Name()
+ if t.Format != "" {
+ s += " " + Escape(t.Format)
+ }
+ if err := writeIndent(w, s, n); err != nil {
+ return err
+ }
+ if err := t.Contents.WriteIndent(w, n+1); err != nil {
+ return err
+ }
+ return nil
+}
+
+func parseContentText(p *Parser, block *TokenBlock) (*TextBlock, error) {
+ format := ""
+ if len(block.Args) >= 1 {
+ format = block.Args[0]
+ }
+ tb := NewTextBlock(format, nil)
+
+ if err := p.Next(); err != nil {
+ return tb, err
+ }
+
+ var err error
+ tb.Contents, err = parseTextFormat(p, block, tb.Format)
+
+ return tb, err
+}
+
+func parseTextFormat(p *Parser, block *TokenBlock, format string) (TextContents, error) {
+ if parser := GetTextContentParser(format); parser != nil {
+ return parser(p, block)
+ }
+ r, err := parseContentRaw(p, block)
+ return TextPreContents{r.Contents}, err
+}
+
+// TextContents represents the textual contents of a text block.
+type TextContents interface {
+ WriteIndent(w io.Writer, n int) error
+}
+
+// TextContentParser parses text content in a text block.
+type TextContentParser func(p *Parser, block *TokenBlock) (TextContents, error)
+
+var textContentParsers = map[string]TextContentParser{}
+
+// GetTextContentParser retrieves a text content parser or nil if it doesn't
+// exist.
+func GetTextContentParser(name string) TextContentParser {
+ return textContentParsers[name]
+}
+
+// RegisterTextContentParser registers a new text content parser for a format.
+func RegisterTextContentParser(name string, parser TextContentParser) {
+ if parser == nil {
+ delete(textContentParsers, name)
+ } else {
+ textContentParsers[name] = parser
+ }
+}
+
+// TextPlainContents represents a list of simple text paragraphs.
+type TextPlainContents struct {
+ // Paragraphs is a list of simple text paragraphs.
+ Paragraphs []string
+}
+
+// WriteIndent writes the plain text content indented by n tabs.
+func (t TextPlainContents) WriteIndent(w io.Writer, n int) error {
+ for i, p := range t.Paragraphs {
+ if i != 0 {
+ if err := writeIndent(w, "", 0); err != nil {
+ return err
+ }
+ }
+ if err := writeIndent(w, Escape(p), n); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// NewTextPlainBlock creates a new TextBlock containing TextPlainContents.
+func NewTextPlainBlock(paragraphs []string) *TextBlock {
+ par := make([]string, len(paragraphs))
+ copy(par, paragraphs)
+ return NewTextBlock("", TextPlainContents{par})
+}
+
+func parseTextPlain(p *Parser, block *TokenBlock) (TextContents, error) {
+ txt := TextPlainContents{}
+ paragraph := ""
+ var err error
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.SimpleText()
+ end := false
+ if text, ok := token.(*TokenSimpleText); ok {
+ if text.Text == "" {
+ end = true
+ } else if paragraph == "" {
+ paragraph = text.Text
+ } else {
+ paragraph += " " + text.Text
+ }
+ } else if _, ok := token.(*TokenEmptyLine); ok && paragraph != "" {
+ end = true
+ }
+ if end {
+ txt.Paragraphs = append(txt.Paragraphs, paragraph)
+ paragraph = ""
+ }
+ err = p.Next()
+ }
+ if paragraph != "" {
+ txt.Paragraphs = append(txt.Paragraphs, paragraph)
+ }
+ return txt, err
+}
+
+// TextPreContents represents preformatted contents of a text block.
+type TextPreContents struct {
+ // Text is the preformatted content.
+ Text string
+}
+
+// WriteIndent writes the preformatted text content indented by n tabs.
+func (t TextPreContents) WriteIndent(w io.Writer, n int) error {
+ ss := strings.Split(t.Text, "\n")
+ for _, s := range ss {
+ if err := writeIndent(w, EscapeNonspace(s), n); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// NewTextPreBlock creates a new TextBlock containing TextPreContents.
+func NewTextPreBlock(text string) *TextBlock {
+ return NewTextBlock("", TextPreContents{text})
+}
+
+func parseTextPre(p *Parser, block *TokenBlock) (TextContents, error) {
+ var lines []string
+ var ls []string
+ var err error
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.RawText()
+ if text, ok := token.(*TokenRawText); ok {
+ if len(ls) > 0 {
+ lines = append(lines, ls...)
+ ls = ls[:0]
+ }
+ lines = append(lines, Unescape(text.Text))
+ } else if _, ok := token.(*TokenEmptyLine); ok && len(lines) > 0 {
+ ls = append(ls, "")
+ }
+ err = p.Next()
+ }
+ return TextPreContents{strings.Join(lines, "\n")}, err
+}
+
+// RawBlock represents a "raw" content block.
+type RawBlock struct {
+ // Syntax is the syntax of the block contents (first word of block argument)
+ Syntax string
+
+ // Contents is the raw content.
+ Contents string
+}
+
+// Name returns the block name "raw".
+func (r *RawBlock) Name() string { return "raw" }
+
+// Args returns the block's arguments (syntax).
+func (r *RawBlock) Args() []string {
+ if r.Syntax == "" {
+ return nil
+ }
+ return []string{r.Syntax}
+}
+
+// WriteIndent writes the raw content indented by n tabs.
+func (r *RawBlock) WriteIndent(w io.Writer, n int) error {
+ s := r.Name()
+ if r.Syntax != "" {
+ s += " " + Escape(r.Syntax)
+ }
+ if err := writeIndent(w, s, n); err != nil {
+ return err
+ }
+ if r.Contents != "" {
+ ss := strings.Split(r.Contents, "\n")
+ for _, s := range ss {
+ if err := writeIndent(w, s, n+1); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+func parseContentRaw(p *Parser, block *TokenBlock) (*RawBlock, error) {
+ arg := ""
+ if len(block.Args) > 0 {
+ arg = block.Args[0]
+ }
+ rb := &RawBlock{arg, ""}
+
+ if err := p.Next(); err != nil {
+ return rb, err
+ }
+
+ var lines []string
+ var ls []string
+ var err error
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.RawText()
+ if text, ok := token.(*TokenRawText); ok {
+ if len(ls) > 0 {
+ lines = append(lines, ls...)
+ ls = ls[:0]
+ }
+ lines = append(lines, text.Text)
+ } else if _, ok := token.(*TokenEmptyLine); ok && len(lines) > 0 {
+ ls = append(ls, "")
+ }
+ err = p.Next()
+ }
+ rb.Contents = strings.Join(lines, "\n")
+
+ return rb, err
+}
+
+// ListBlock represents a "list" content block.
+type ListBlock struct {
+ ContentBlock
+}
+
+// NewListBlock creates a new ListBlock.
+//
+// If the ordered parameter is true, the list is created in "ordered" mode.
+func NewListBlock(ordered bool) *ListBlock {
+ arg := ""
+ if ordered {
+ arg = "ordered"
+ }
+ return &ListBlock{*NewContentBlock("list", arg)}
+}
+
+// Ordered returns true if the list is in ordered mode (first word of the
+// block argument is "ordered").
+func (b *ListBlock) Ordered() bool {
+ return len(b.args) >= 1 && b.args[0] == "ordered"
+}
+
+func parseContentList(p *Parser, block *TokenBlock) (*ListBlock, error) {
+ list := NewListBlock(false)
+ list.args = block.Args
+ if err := p.Next(); err != nil {
+ return list, err
+ }
+ return list, list.parse(p, block)
+}
+
+// TableBlock represents a "table" content block.
+type TableBlock struct {
+ rows []Block
+}
+
+// NewTableBlock creates a new TableBlock.
+func NewTableBlock() *TableBlock {
+ return &TableBlock{}
+}
+
+// Name returns the block name "table".
+func (t *TableBlock) Name() string {
+ return "table"
+}
+
+// Args returns the block's nil arguments.
+func (t *TableBlock) Args() []string {
+ return nil
+}
+
+// WriteIndent writes the table header and contents indented by n tabs.
+func (t *TableBlock) WriteIndent(w io.Writer, n int) error {
+ if err := writeIndent(w, t.Name(), n); err != nil {
+ return err
+ }
+ for _, row := range t.rows {
+ if err := row.WriteIndent(w, n+1); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Rows returns the table's rows.
+func (t *TableBlock) Rows() []Block {
+ return t.rows
+}
+
+// AppendRow adds a new row to the end of the table.
+func (t *TableBlock) AppendRow(row Block) {
+ t.rows = append(t.rows, row)
+}
+
+func (t *TableBlock) parse(p *Parser, block *TokenBlock) (err error) {
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.Block()
+ if blk, ok := token.(*TokenBlock); ok {
+ var b Block
+ switch blk.Name {
+ case "row":
+ b, err = parseTableRow(p, blk)
+ case "header":
+ b, err = parseTableHeader(p, blk)
+ default:
+ err = parseUnknown(p, blk)
+ }
+ if b != nil {
+ t.AppendRow(b)
+ }
+ } else if err = p.Next(); err != nil {
+ break
+ }
+ }
+ return
+}
+
+func parseContentTable(p *Parser, block *TokenBlock) (*TableBlock, error) {
+ table := NewTableBlock()
+ if err := p.Next(); err != nil {
+ return table, err
+ }
+ return table, table.parse(p, block)
+}
+
+// RowBlock represents a "row" table block.
+type RowBlock struct {
+ ContentBlock
+}
+
+// NewRowBlock creates a new RowBlock.
+func NewRowBlock() *RowBlock {
+ return &RowBlock{*NewContentBlock("row", "")}
+}
+
+func parseTableRow(p *Parser, block *TokenBlock) (*RowBlock, error) {
+ row := NewRowBlock()
+ if err := p.Next(); err != nil {
+ return row, err
+ }
+ return row, row.parse(p, block)
+}
+
+// HeaderBlock represents a "header" table block.
+type HeaderBlock struct {
+ ContentBlock
+}
+
+// NewHeaderBlock creates a new HeaderBlock.
+func NewHeaderBlock() *HeaderBlock {
+ return &HeaderBlock{*NewContentBlock("header", "")}
+}
+
+func parseTableHeader(p *Parser, block *TokenBlock) (*HeaderBlock, error) {
+ hdr := NewHeaderBlock()
+ if err := p.Next(); err != nil {
+ return hdr, err
+ }
+ return hdr, hdr.parse(p, block)
+}
+
+// EmbedBlock represents an "embed" content block.
+type EmbedBlock struct {
+ // Type is the content type (first word of block argument).
+ Type string
+
+ // URL is the content URL (second word of the block argument).
+ URL string
+
+ // Description is the content description (block body as simple text).
+ Description string
+}
+
+// Name returns the block name "embed".
+func (e *EmbedBlock) Name() string { return "embed" }
+
+// Args returns the block argument (type and URL).
+func (e *EmbedBlock) Args() []string {
+ if e.Type != "" && e.URL != "" {
+ return []string{e.Type, e.URL}
+ }
+ return []string{e.Type}
+}
+
+// WriteIndent writes the embed block header and contents indented by n tabs.
+func (e *EmbedBlock) WriteIndent(w io.Writer, n int) error {
+ if e.URL == "" {
+ return nil
+ }
+
+ s := e.Name() + " "
+ if e.Type == "" {
+ s += "*/*"
+ } else {
+ s += Escape(e.Type)
+ }
+ s += " " + Escape(e.URL)
+ if err := writeIndent(w, s, n); err != nil {
+ return err
+ }
+ if err := writeIndent(w, Escape(e.Description), n+1); err != nil {
+ return err
+ }
+ return nil
+}
+
+func parseContentEmbed(p *Parser, block *TokenBlock) (*EmbedBlock, error) {
+ embed := &EmbedBlock{}
+ if len(block.Args) >= 1 {
+ embed.Type = block.Args[0]
+ if len(block.Args) >= 2 {
+ embed.URL = block.Args[1]
+ }
+ }
+ if err := p.Next(); err != nil {
+ return embed, err
+ }
+ s, err := getSimpleText(p, block)
+ embed.Description = s
+ return embed, err
+}
diff --git a/document.go b/document.go
new file mode 100644
index 0000000..b5cdbe5
--- /dev/null
+++ b/document.go
@@ -0,0 +1,278 @@
+// Package cnm implements CNM document parsing and composition.
+package cnm // import "contnet.org/lib/cnm-go"
+
+import (
+ "bufio"
+ "io"
+ "path"
+ "strings"
+)
+
+// Document represents a CNM document.
+type Document struct {
+ // Title is the document title (top-level "title" block).
+ Title string
+
+ // Links is a list of document-level hyperlinks (top-level "links" block).
+ Links []Link
+
+ // Site is a sitemap (top-level "site" block).
+ Site Site
+
+ // Content is the document content (top-level "content" block).
+ Content *ContentBlock
+}
+
+// ParseDocument parses a CNM document from r.
+func ParseDocument(r io.Reader) (doc *Document, err error) {
+ p := NewParser(r)
+ doc = &Document{}
+ err = p.Next()
+ for err == nil {
+ token := p.Block()
+ if err = p.Next(); err != nil {
+ break
+ }
+ if blk, ok := token.(*TokenBlock); ok {
+ switch blk.Name {
+ case "title":
+ err = doc.parseTitle(p, blk)
+ case "links":
+ err = doc.parseLinks(p, blk)
+ case "site":
+ err = doc.Site.parse(p, blk)
+ case "content":
+ if doc.Content == nil {
+ doc.Content = &ContentBlock{name: "content"}
+ }
+ err = doc.Content.parse(p, blk)
+ default:
+ // discard lines inside this block
+ for err == nil {
+ if !p.Empty() && p.Indent() <= blk.Indent() {
+ break
+ }
+ err = p.Next()
+ }
+ }
+ }
+ }
+ if err == io.EOF {
+ err = nil
+ }
+ return
+}
+
+func (doc *Document) Write(w io.Writer) error {
+ bw := bufio.NewWriter(w)
+ if doc.Title != "" {
+ if err := writeIndent(bw, "title", 0); err != nil {
+ return err
+ }
+ if err := writeIndent(bw, Escape(doc.Title), 1); err != nil {
+ return err
+ }
+ }
+ if len(doc.Links) > 0 {
+ if err := writeIndent(bw, "links", 0); err != nil {
+ return err
+ }
+ for _, link := range doc.Links {
+ if err := link.WriteIndent(bw, 1); err != nil {
+ return err
+ }
+ }
+ }
+ if len(doc.Site.Children) > 0 {
+ if err := writeIndent(bw, "site", 0); err != nil {
+ return err
+ }
+ for _, site := range doc.Site.Children {
+ if err := site.WriteIndent(bw, 1); err != nil {
+ return err
+ }
+ }
+ }
+ if doc.Content != nil {
+ if err := doc.Content.WriteIndent(bw, 0); err != nil {
+ return err
+ }
+ }
+ return bw.Flush()
+}
+
+func (doc *Document) parseTitle(p *Parser, block *TokenBlock) (err error) {
+ s, err := getSimpleText(p, block)
+ if doc.Title == "" {
+ doc.Title = s
+ } else {
+ doc.Title += " " + s
+ }
+ return
+}
+
+func (doc *Document) parseLinks(p *Parser, block *TokenBlock) (err error) {
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.Block()
+ if blk, ok := token.(*TokenBlock); ok {
+ if blk.Name == "" {
+ err = parseUnknown(p, blk)
+ } else {
+ link := Link{
+ URL: blk.Name,
+ Name: strings.Join(blk.Args, " "),
+ }
+ doc.Links = append(doc.Links, link)
+ if err = p.Next(); err != nil {
+ break
+ }
+ doc.Links[len(doc.Links)-1].Description, err = getSimpleText(p, blk)
+ }
+ }
+ }
+ return
+}
+
+func getSimpleText(p *Parser, block *TokenBlock) (s string, err error) {
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.SimpleText()
+ if text, ok := token.(*TokenSimpleText); ok && text.Text != "" {
+ if s == "" {
+ s = text.Text
+ } else {
+ s += " " + text.Text
+ }
+ }
+
+ err = p.Next()
+ }
+ return
+}
+
+// Link represents a document-level hyperlink in the "links" top-level block.
+type Link struct {
+ // URL is the hyperlink URL.
+ URL string
+
+ // Name is the hyperlink text.
+ Name string
+
+ // Description is the description of the hyperlink.
+ Description string
+}
+
+// WriteIndent writes the link URL, name and description indented by n tabs.
+func (link Link) WriteIndent(w io.Writer, n int) error {
+ s := Escape(link.URL)
+ if link.Name != "" {
+ s += " " + Escape(link.Name)
+ }
+ if err := writeIndent(w, s, n); err != nil {
+ return err
+ }
+ if link.Description != "" {
+ if err := writeIndent(w, Escape(link.Description), n+1); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Site represents a node in the sitemap in the "site" top-level block.
+type Site struct {
+ // Path is the node's path fragment.
+ Path string
+
+ // Name is the node's name.
+ Name string
+
+ // Children are the nodes below this node.
+ Children []Site
+}
+
+// WriteIndent writes the sitemap indented by n tabs.
+func (site Site) WriteIndent(w io.Writer, n int) error {
+ s := Escape(site.Path)
+ if site.Name != "" {
+ s += " " + Escape(site.Name)
+ }
+ if err := writeIndent(w, s, n); err != nil {
+ return err
+ }
+ for _, ch := range site.Children {
+ if err := ch.WriteIndent(w, n+1); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (site *Site) parse(p *Parser, block *TokenBlock) (err error) {
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.Block()
+ if blk, ok := token.(*TokenBlock); ok {
+ if blk.Name == "" {
+ err = parseUnknown(p, blk)
+ } else {
+ s := Site{
+ Path: strings.Trim(path.Clean(blk.Name), "/"),
+ Name: strings.Join(blk.Args, " "),
+ }
+ site.Children = append(site.Children, s)
+ if err = p.Next(); err != nil {
+ break
+ }
+ err = site.Children[len(site.Children)-1].parse(p, blk)
+ }
+ } else {
+ err = p.Next()
+ }
+ }
+ return
+}
+
+func parseUnknown(p *Parser, block *TokenBlock) (err error) {
+ err = p.Next()
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+ // discard lines inside this block
+ err = p.Next()
+ }
+ return
+}
+
+func writeIndent(w io.Writer, s string, depth int) error {
+ const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+ if s == "" {
+ _, err := w.Write([]byte{'\n'})
+ return err
+ }
+ if depth == 0 {
+ _, err := w.Write([]byte(s + "\n"))
+ return err
+ }
+
+ var ind string
+ if depth <= len(tabs) {
+ ind = tabs[:depth]
+ } else {
+ ind = strings.Repeat("\t", depth)
+ }
+ _, err := w.Write([]byte(ind + s + "\n"))
+ return err
+}
diff --git a/parse.go b/parse.go
new file mode 100644
index 0000000..02f7eb2
--- /dev/null
+++ b/parse.go
@@ -0,0 +1,189 @@
+package cnm
+
+import (
+ "bufio"
+ "io"
+)
+
+// TopLevel represents the top-level block.
+var TopLevel = &TokenBlock{
+ TokenLine: TokenLine{
+ Indentation: -1,
+ RawLine: "",
+ LineNo: 0,
+ },
+ Parent: nil,
+ Name: "",
+ Args: nil,
+}
+
+// Parser parses a CNM document by lines.
+type Parser struct {
+ r *bufio.Reader
+ line int
+ block *TokenBlock
+ current *TokenLine
+ end bool
+}
+
+// NewParser creates a new Parser that reads from r.
+func NewParser(r io.Reader) *Parser {
+ return &Parser{
+ r: bufio.NewReader(r),
+ line: 0,
+ block: TopLevel,
+ current: nil,
+ end: false,
+ }
+}
+
+// Line returns the number of the last parsed line in the document, starting
+// with 1 after the first line.
+func (p *Parser) Line() int {
+ return p.line
+}
+
+// Next retrieves the next line.
+func (p *Parser) Next() error {
+ line, err := p.nextLine()
+ if err != nil {
+ return err
+ }
+ indent := 0
+ for _, c := range line {
+ if c != '\t' {
+ break
+ }
+ indent++
+ }
+ if indent > p.block.Indent()+1 {
+ indent = p.block.Indent() + 1
+ }
+ p.current = &TokenLine{
+ Indentation: indent,
+ RawLine: line,
+ LineNo: p.line,
+ }
+ if p.current.Indent() <= p.block.Indent() && !p.Empty() {
+ p.block = p.block.Parent
+ }
+ return nil
+}
+
+// Indent returns the indentation of the current line.
+//
+// Returns -1 if no line has been read yet.
+func (p *Parser) Indent() int {
+ if p.current == nil {
+ return -1
+ }
+ return p.current.Indent()
+}
+
+// Empty returns true if the current line is empty.
+func (p *Parser) Empty() bool {
+ if p.current == nil {
+ return true
+ }
+ if p.current.Indent() == len(p.current.Raw()) {
+ return true
+ }
+ return false
+}
+
+// Block parses the current line in block mode.
+//
+// Returns a TokenBlock if the line was not empty, otherwise TokenEmptyLine. In
+// block mode, a line is empty even if its indentation exceeds the block
+// content indentation, as long as it only contains tab characters.
+//
+// Next() must have been called before calling Block().
+func (p *Parser) Block() Token {
+ line := p.current.Raw()[p.current.Indent():]
+
+ /*indent := 0
+ for _, c := range line {
+ if c != '\t' {
+ break
+ }
+ indent++
+ }
+ if len(line) == indent {
+ return &TokenEmptyLine{*p.current}
+ }*/
+
+ ss := SplitUnescape(line)
+ if len(ss) == 0 || len(ss) == 1 && ss[0] == "" {
+ return &TokenEmptyLine{*p.current}
+ }
+
+ block := TokenBlock{
+ TokenLine: *p.current,
+ Parent: p.block,
+ }
+ block.Name = ss[0]
+ if len(ss) > 1 {
+ block.Args = ss[1:]
+ }
+
+ p.block = &block
+
+ return &block
+}
+
+// RawText parses the current line as raw text.
+//
+// Returns a TokenRawText if the line was not empty, otherwise
+// TokenEmptyLine.
+//
+// Next() must have been called before calling RawText().
+func (p *Parser) RawText() Token {
+ if p.Empty() {
+ return &TokenEmptyLine{*p.current}
+ }
+ return &TokenRawText{
+ TokenLine: *p.current,
+ Text: p.current.Raw()[p.current.Indent():],
+ }
+}
+
+// SimpleText parses the current line as simple text.
+//
+// Returns a TokenSimpleText if the line was not empty, otherwise
+// TokenEmptyLine.
+//
+// Next() must have been called before calling SimpleText().
+func (p *Parser) SimpleText() Token {
+ if p.Empty() {
+ return &TokenEmptyLine{*p.current}
+ }
+ return &TokenSimpleText{
+ TokenLine: *p.current,
+ Text: ParseSimpleText(p.current.Raw()[p.current.Indent():]),
+ }
+}
+
+func (p *Parser) nextLine() (string, error) {
+ l, err := p.r.ReadString('\n')
+ if err == io.EOF {
+ if l != "" {
+ err = nil
+ } else if !p.end { // XXX
+ l = "\n"
+ p.end = true
+ err = nil
+ }
+ }
+ rs := make([]rune, len(l))
+ ri := 0
+ for _, r := range l {
+ switch r {
+ case '\n', '\r', '\x00':
+ continue
+ }
+ rs[ri] = r
+ ri++
+ }
+ p.line++
+ return string(rs[:ri]), err
+}
diff --git a/parse_test.go b/parse_test.go
new file mode 100644
index 0000000..b47dc14
--- /dev/null
+++ b/parse_test.go
@@ -0,0 +1,701 @@
+package cnm
+
+import (
+ "reflect"
+ "strings"
+ "testing"
+
+ "github.com/davecgh/go-spew/spew"
+)
+
+var parseTests = map[string]*Document{
+ "": &Document{},
+
+ "foo\n\tbar\ntitle\n\ttest": &Document{
+ Title: "test",
+ },
+
+ "foo\n\tbar\ntitle\n\ttest\nfoo\n\tbar": &Document{
+ Title: "test",
+ },
+
+ "title\n\ttest": &Document{
+ Title: "test",
+ },
+
+ "\ntitle\n\ttest\n": &Document{
+ Title: "test",
+ },
+
+ "title\n\ttest\n\n": &Document{
+ Title: "test",
+ },
+
+ "\ntitle\n\t\t\t\t\t\n\t\tfoo bar": &Document{
+ Title: "foo bar",
+ },
+
+ "site\n\tfoo\nsite\n\t\tbar\n": &Document{
+ Site: Site{Children: []Site{
+ Site{Path: "foo"},
+ }},
+ },
+
+ "content\n\ttext\n\t\tfoo\ncontent\n\t\tbar\n": &Document{
+ Content: &ContentBlock{
+ name: "content",
+ children: []Block{
+ &TextBlock{
+ Format: "",
+ Contents: TextPlainContents{
+ Paragraphs: []string{"foo"},
+ },
+ },
+ },
+ },
+ },
+
+ "\n\ttitle\n\t\t\t\t\t\n\tfoo bar": &Document{},
+
+ "\ttitle\n\t\tfoo\n": &Document{},
+
+ "\tsite\n\t\tfoo\n": &Document{},
+
+ "\tlinks\n\t\tfoo\n": &Document{},
+
+ "links\n\tfoo": &Document{
+ Links: []Link{
+ Link{
+ URL: "foo",
+ },
+ },
+ },
+
+ "qwe\ntitle\n\tasd": &Document{
+ Title: "asd",
+ },
+
+ "links\n\t qwe\n\tasd": &Document{
+ Links: []Link{
+ Link{URL: "asd"},
+ },
+ },
+
+ "site\n\t qwe\n\tasd": &Document{
+ Site: Site{Children: []Site{
+ Site{Path: "asd"},
+ }},
+ },
+
+ "site\n\tba\\nr": &Document{
+ Site: Site{
+ Children: []Site{
+ Site{
+ Path: "ba\nr",
+ },
+ },
+ },
+ },
+
+ "site\n\t\t\t\tba\\nr": &Document{},
+
+ "site\n\tfoo\tbar": &Document{
+ Site: Site{
+ Children: []Site{
+ Site{
+ Path: "foo",
+ Name: "bar",
+ },
+ },
+ },
+ },
+
+ "\t\tsite\n\t\t\t\tfoo": &Document{},
+
+ "\tsite\n\tbar": &Document{},
+
+ "content\n\tsection test\n": &Document{
+ Content: &ContentBlock{
+ name: "content",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"test"},
+ }},
+ },
+ },
+ },
+
+ "content\n\tnosuchblock\n\tsection test\n": &Document{
+ Content: &ContentBlock{
+ name: "content",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"test"},
+ }},
+ },
+ },
+ },
+
+ "content\n\tnosuchblock\n\tsection test\n\n\tnosuchblock2": &Document{
+ Content: &ContentBlock{
+ name: "content",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"test"},
+ }},
+ },
+ },
+ },
+
+ "content\n\tsection\n\t\tnosuchblock\n\t\tsection\n\t\t\ttext\n\t\t\t\ttest": &Document{
+ Content: &ContentBlock{
+ name: "content",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: nil,
+ children: []Block{
+ &TextBlock{
+ Format: "",
+ Contents: TextPlainContents{Paragraphs: []string{
+ "test",
+ }},
+ },
+ },
+ }},
+ },
+ }},
+ },
+ },
+ },
+
+ "content\n\ttable\n\t\tnosuchblock\n\t\trow\n\t\t\ttext\n\t\t\t\ttest": &Document{
+ Content: &ContentBlock{
+ name: "content",
+ args: nil,
+ children: []Block{
+ &TableBlock{rows: []Block{
+ &RowBlock{ContentBlock{
+ name: "row",
+ args: nil,
+ children: []Block{
+ &TextBlock{
+ Format: "",
+ Contents: TextPlainContents{Paragraphs: []string{
+ "test",
+ }},
+ },
+ },
+ }},
+ }},
+ },
+ },
+ },
+
+ "site\n\t\ttest\n\tfoo\\ bar baz\n" +
+ "links\n\tfoo\\ bar baz\n\t\t\tquux\n" +
+ "content\n" +
+ "links\n\t\ttest\n" +
+ "content\n" +
+ "\tsection\n\tsection qweasd\n" +
+ "\tsection foo\\ bar baz\n\t\ttext\n\t\t\ttest\n": &Document{
+ Site: Site{
+ Children: []Site{
+ Site{
+ Path: "foo bar",
+ Name: "baz",
+ },
+ },
+ },
+ Links: []Link{
+ Link{
+ URL: "foo bar",
+ Name: "baz",
+ Description: "quux",
+ },
+ },
+ Content: &ContentBlock{
+ name: "content",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: nil,
+ }},
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"qweasd"},
+ }},
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"foo bar baz"},
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "test",
+ }},
+ },
+ },
+ }},
+ },
+ },
+ },
+
+ "title\n\tfoo bar\n" +
+ "links\n\tqwe asd\n\tzxc 123\n" +
+ "site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n" +
+ "title\n\tbaz\n" +
+ "links\n\tfoo\n" +
+ "site\n\ttest": &Document{
+ Title: "foo bar baz",
+ Links: []Link{
+ Link{"qwe", "asd", ""},
+ Link{"zxc", "123", ""},
+ Link{"foo", "", ""},
+ },
+ Site: Site{
+ Children: []Site{
+ Site{
+ Path: "foo",
+ Children: []Site{
+ Site{Path: "bar"},
+ Site{
+ Path: "baz/quux",
+ Children: []Site{
+ Site{Path: "123"},
+ },
+ },
+ },
+ },
+ Site{Path: "test"},
+ },
+ },
+ },
+
+ `
+thing stuff
+ whatever
+title blah
+
+
+ Test
+
+title
+ document
+
+content
+ section Test section
+ text
+ This is \n just a
+ text pre
+
+ t e \n s t
+
+
+
+ preformatted text
+
+ raw text/plain
+ of various \n features
+
+ section of\ the
+ table
+ header
+ text
+ Column 1
+ text
+ Column 2
+ row
+ text
+ CNM
+ text
+ document
+
+ format
+ row
+ section
+ lorem
+ list
+ text
+ ipsum
+ list ordered
+ list unordered
+ text
+ dolor
+
+ sit
+ amet
+ embed text/cnm cnp://example.com/ stuff
+ thing
+ whatever
+`: &Document{
+ Title: "Test document",
+ Content: &ContentBlock{
+ name: "content",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"Test section"},
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "This is \n just a",
+ }},
+ },
+ &TextBlock{
+ Format: "pre",
+ Contents: TextPreContents{
+ " t e \n s t \n\t\n\n\npreformatted text",
+ },
+ },
+ &RawBlock{
+ Syntax: "text/plain",
+ Contents: "of various \\n features",
+ },
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"of the"},
+ children: []Block{
+ &TableBlock{[]Block{
+ &HeaderBlock{ContentBlock{
+ name: "header",
+ args: nil,
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "Column 1",
+ }},
+ },
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "Column 2",
+ }},
+ },
+ },
+ }},
+ &RowBlock{ContentBlock{
+ name: "row",
+ args: nil,
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "CNM",
+ }},
+ },
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "document",
+ "format",
+ }},
+ },
+ },
+ }},
+ &RowBlock{ContentBlock{
+ name: "row",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: nil,
+ }},
+ &ListBlock{ContentBlock{
+ name: "list",
+ args: nil,
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "ipsum",
+ }},
+ },
+ &ListBlock{ContentBlock{
+ name: "list",
+ args: []string{"ordered"},
+ children: []Block{
+ &ListBlock{ContentBlock{
+ name: "list",
+ args: []string{"unordered"},
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "dolor",
+ "sit amet",
+ }},
+ },
+ },
+ }},
+ },
+ }},
+ },
+ }},
+ },
+ }},
+ }},
+ },
+ }},
+ },
+ }},
+ &EmbedBlock{
+ Type: "text/cnm",
+ URL: "cnp://example.com/",
+ Description: "thing whatever",
+ },
+ },
+ },
+ },
+}
+
+func TestParse(t *testing.T) {
+ for k, v := range parseTests {
+ t.Run(k, func(t *testing.T) {
+ d, err := ParseDocument(strings.NewReader(k))
+ if err != nil {
+ t.Fatalf("ParseDocument(%q): error: %v", k, err)
+ }
+ if !documentEqual(d, v) {
+ t.Fatalf("ParseDocument(%q):\nexpected:\n%s\n got:\n%s", k, reprDoc(v), reprDoc(d))
+ }
+ })
+ }
+}
+
+func reprDoc(d *Document) string {
+ //return fmt.Sprintf("Document{Title: %q, Links: %+v, Site: %+v, Content: %s}", d.Title, d.Links, d.Site, reprContent(d.Content))
+ return spew.Sdump(d)
+}
+
+func documentEqual(a, b *Document) bool {
+ if a.Title != b.Title {
+ return false
+ }
+ if len(a.Links) != len(b.Links) {
+ return false
+ }
+ for i := range a.Links {
+ if !linkEqual(a.Links[i], b.Links[i]) {
+ return false
+ }
+ }
+ if !siteEqual(a.Site, b.Site) {
+ return false
+ }
+ if !contentBlockEqual(a.Content, b.Content) {
+ return false
+ }
+ return true
+}
+
+func linkEqual(a, b Link) bool {
+ return a == b
+}
+
+func siteEqual(a, b Site) bool {
+ if a.Path != b.Path {
+ return false
+ }
+ if a.Name != b.Name {
+ return false
+ }
+ if len(a.Children) != len(b.Children) {
+ return false
+ }
+ for i := range a.Children {
+ if !siteEqual(a.Children[i], b.Children[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+func blockEqual(a, b Block) bool {
+ switch va := a.(type) {
+ case *SectionBlock:
+ vb, ok := b.(*SectionBlock)
+ if !ok {
+ return false
+ }
+ return sectionBlockEqual(va, vb)
+
+ case *TextBlock:
+ vb, ok := b.(*TextBlock)
+ if !ok {
+ return false
+ }
+ return textBlockEqual(va, vb)
+
+ case *RawBlock:
+ vb, ok := b.(*RawBlock)
+ if !ok {
+ return false
+ }
+ return rawBlockEqual(va, vb)
+
+ case *ListBlock:
+ vb, ok := b.(*ListBlock)
+ if !ok {
+ return false
+ }
+ return listBlockEqual(va, vb)
+
+ case *TableBlock:
+ vb, ok := b.(*TableBlock)
+ if !ok {
+ return false
+ }
+ return tableBlockEqual(va, vb)
+
+ case *HeaderBlock:
+ vb, ok := b.(*HeaderBlock)
+ if !ok {
+ return false
+ }
+ return headerBlockEqual(va, vb)
+
+ case *RowBlock:
+ vb, ok := b.(*RowBlock)
+ if !ok {
+ return false
+ }
+ return rowBlockEqual(va, vb)
+
+ case *EmbedBlock:
+ vb, ok := b.(*EmbedBlock)
+ if !ok {
+ return false
+ }
+ return embedBlockEqual(va, vb)
+
+ case *ContentBlock:
+ vb, ok := b.(*ContentBlock)
+ if !ok {
+ return false
+ }
+ return contentBlockEqual(va, vb)
+
+ default: // shouldn't happen
+ return false
+ }
+}
+
+func contentBlockEqual(a, b *ContentBlock) bool {
+ if (a == nil) != (b == nil) {
+ return false
+ }
+ if a == nil {
+ return true
+ }
+ if a.Name() != b.Name() {
+ return false
+ }
+ aa, ba := a.Args(), b.Args()
+ if len(aa) != len(ba) {
+ return false
+ }
+ for i := range aa {
+ if aa[i] != ba[i] {
+ return false
+ }
+ }
+ ca, cb := a.Children(), b.Children()
+ if len(ca) != len(cb) {
+ return false
+ }
+ for i := range ca {
+ if !blockEqual(ca[i], cb[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+func sectionBlockEqual(a, b *SectionBlock) bool {
+ return contentBlockEqual(&a.ContentBlock, &b.ContentBlock)
+}
+
+func textBlockEqual(a, b *TextBlock) bool {
+ if a.Format != b.Format {
+ return false
+ }
+ return textContentsEqual(a.Contents, b.Contents)
+}
+
+func textContentsEqual(a, b TextContents) bool {
+ switch va := a.(type) {
+ case TextPlainContents:
+ vb, ok := b.(TextPlainContents)
+ if !ok {
+ return false
+ }
+ return textPlainContentsEqual(va, vb)
+
+ case TextPreContents:
+ vb, ok := b.(TextPreContents)
+ if !ok {
+ return false
+ }
+ return textPreContentsEqual(va, vb)
+
+ default:
+ return reflect.TypeOf(a) == reflect.TypeOf(b) && reflect.DeepEqual(a, b)
+ }
+}
+
+func textPlainContentsEqual(a, b TextPlainContents) bool {
+ if len(a.Paragraphs) != len(b.Paragraphs) {
+ return false
+ }
+ for i := range a.Paragraphs {
+ if a.Paragraphs[i] != b.Paragraphs[i] {
+ return false
+ }
+ }
+ return true
+}
+
+func textPreContentsEqual(a, b TextPreContents) bool {
+ return a == b
+}
+
+func rawBlockEqual(a, b *RawBlock) bool {
+ return *a == *b
+}
+
+func listBlockEqual(a, b *ListBlock) bool {
+ return contentBlockEqual(&a.ContentBlock, &b.ContentBlock)
+}
+
+func tableBlockEqual(a, b *TableBlock) bool {
+ ra, rb := a.Rows(), b.Rows()
+ if len(ra) != len(rb) {
+ return false
+ }
+ for i := range ra {
+ if !blockEqual(ra[i], rb[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+func rowBlockEqual(a, b *RowBlock) bool {
+ return contentBlockEqual(&a.ContentBlock, &b.ContentBlock)
+}
+
+func headerBlockEqual(a, b *HeaderBlock) bool {
+ return contentBlockEqual(&a.ContentBlock, &b.ContentBlock)
+}
+
+func embedBlockEqual(a, b *EmbedBlock) bool {
+ return *a == *b
+}
diff --git a/simpletext.go b/simpletext.go
new file mode 100644
index 0000000..78e089e
--- /dev/null
+++ b/simpletext.go
@@ -0,0 +1,196 @@
+package cnm
+
+import (
+ "bytes"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+// ParseSimpleText parses raw as simple text (collapses whitespace and resolves
+// escape sequences).
+func ParseSimpleText(raw string) string {
+ return Unescape(CollapseWhitespace(raw))
+}
+
+// CollapseWhitespace collapses sequences of non-escaped whitespace in raw CNM
+// simple text into single spaces.
+func CollapseWhitespace(raw string) string {
+ s := strings.Join(strings.FieldsFunc(raw, func(r rune) bool {
+ switch r {
+ case '\t', '\n', '\f', '\r':
+ return true
+ }
+ return false
+ }), " ")
+
+ var buf bytes.Buffer
+ first := true
+ escape := false
+ space := false
+ for _, r := range s {
+ switch r {
+ case '\\':
+ if escape {
+ buf.WriteString("\\\\")
+ escape = false
+ } else {
+ escape = true
+ }
+ if space && !first {
+ buf.WriteByte(' ')
+ }
+ space = false
+ first = false
+ case ' ':
+ if escape {
+ buf.WriteString("\\ ")
+ escape = false
+ } else {
+ space = true
+ }
+ default:
+ if escape {
+ buf.WriteByte('\\')
+ }
+ if space && !first {
+ buf.WriteByte(' ')
+ }
+ buf.WriteRune(r)
+ escape = false
+ space = false
+ first = false
+ }
+ }
+ if escape {
+ buf.WriteByte('\\')
+ }
+ return buf.String()
+}
+
+var escapeRe = regexp.MustCompile(`[\t\n\f\r\\\x00]|^ | $| `)
+
+// Escape escapes whitespace, backslash and and U+0000 within s.
+func Escape(s string) string {
+ return escapeRe.ReplaceAllStringFunc(s, func(match string) string {
+ switch match {
+ case "\t":
+ return `\t`
+ case "\n":
+ return `\n`
+ case "\f":
+ return `\f`
+ case "\r":
+ return `\r`
+ case "\\":
+ return `\\`
+ case "\x00":
+ return `\x00`
+ case " ":
+ return `\ `
+ case " ":
+ return ` \ `
+ }
+ return match // this shouldn't happen
+ })
+}
+
+var escapeSpaceRe = regexp.MustCompile(`[\t\n]|^ | $| `)
+
+// EscapeSpace works like Escape, except it only escapes spaces, tabs and line
+// feeds.
+func EscapeSpace(s string) string {
+ return escapeSpaceRe.ReplaceAllStringFunc(s, func(match string) string {
+ switch match {
+ case "\t":
+ return `\t`
+ case "\n":
+ return `\n`
+ case " ":
+ return `\ `
+ case " ":
+ return ` \ `
+ }
+ return match // this shouldn't happen
+ })
+}
+
+var escapeNonspaceRe = regexp.MustCompile(`[\f\r\\\x00]`)
+
+// EscapeNonspace works like Escape, except it does not escape spaces, tabs and
+// line feeds.
+func EscapeNonspace(s string) string {
+ return escapeNonspaceRe.ReplaceAllStringFunc(s, func(match string) string {
+ switch match {
+ case "\f":
+ return `\f`
+ case "\r":
+ return `\r`
+ case "\\":
+ return `\\`
+ case "\x00":
+ return `\x00`
+ }
+ return match // this shouldn't happen
+ })
+}
+
+var unescapeRe = regexp.MustCompile(`\\(?:[btnvfr \\]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})`)
+
+// Unescape resolves escape sequences in simple text.
+func Unescape(s string) string {
+ return unescapeRe.ReplaceAllStringFunc(s, func(match string) string {
+ switch match[1] {
+ case 'b':
+ return "\b"
+ case 't':
+ return "\t"
+ case 'n':
+ return "\n"
+ case 'v':
+ return "\v"
+ case 'f':
+ return "\f"
+ case 'r':
+ return "\r"
+ case ' ':
+ return " "
+ case '\\':
+ return "\\"
+ case 'x':
+ n, _ := strconv.ParseUint(match[2:], 16, 8)
+ return string(n)
+ case 'u':
+ n, _ := strconv.ParseUint(match[2:], 16, 16)
+ return string(n)
+ case 'U':
+ n, _ := strconv.ParseUint(match[2:], 16, 32)
+ return string(n)
+ }
+ return match // this shouldn't happen
+ })
+}
+
+var splitRe = regexp.MustCompile(`((?:[^\t\n\f\r\\ ]|\\.?)+|^)`)
+
+// SplitUnescape splits the string s by whitespace, then unescapes simple text
+// escape sequences.
+func SplitUnescape(s string) []string {
+ ss := splitRe.FindAllString(s, -1)
+ for i := range ss {
+ ss[i] = Unescape(ss[i])
+ }
+ return ss
+}
+
+// JoinEscape escapes each argument using simple text escape sequences and then
+// joins them with spaces.
+func JoinEscape(ss []string) string {
+ var l []string
+ for _, s := range ss {
+ if s != "" {
+ l = append(l, Escape(s))
+ }
+ }
+ return strings.Join(l, " ")
+}
diff --git a/simpletext_test.go b/simpletext_test.go
new file mode 100644
index 0000000..8fdf754
--- /dev/null
+++ b/simpletext_test.go
@@ -0,0 +1,180 @@
+package cnm
+
+import "testing"
+
+var simpleEscapes = map[string]string{
+ "": ``,
+ "ContNet": `ContNet`,
+ "\t": `\t`,
+ "\n": `\n`,
+ "\f": `\f`,
+ "\r": `\r`,
+ " ": `\ `,
+ "\\": `\\`,
+ "\x00": `\x00`,
+ " ": `\ \ \ \ `,
+ " ": `\ \ \ \ `,
+ " ": `\ \ \ `,
+ " ": `\ \ \ `,
+ " ": `\ \ `,
+ " ": `\ \ `,
+ "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff",
+}
+
+func TestEscape(t *testing.T) {
+ for k, v := range simpleEscapes {
+ t.Run(k, func(t *testing.T) {
+ e := Escape(k)
+ if e != v {
+ t.Errorf("Escape(%q) -> %q, expected %q", k, e, v)
+ }
+ })
+ }
+}
+
+var nonspaceEscapes = map[string]string{
+ "": ``,
+ "ContNet": `ContNet`,
+ "\t": "\t",
+ "\n": "\n",
+ "\f": `\f`,
+ "\r": `\r`,
+ " ": ` `,
+ "\\": `\\`,
+ "\x00": `\x00`,
+ " ": ` `,
+ " ": ` `,
+ " ": ` `,
+ " ": ` `,
+ " ": ` `,
+ " ": ` `,
+ "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff",
+}
+
+func TestEscapeNonspace(t *testing.T) {
+ for k, v := range nonspaceEscapes {
+ t.Run(k, func(t *testing.T) {
+ e := EscapeNonspace(k)
+ if e != v {
+ t.Errorf("EscapeNonspace(%q) -> %q, expected %q", k, e, v)
+ }
+ })
+ }
+}
+
+var spaceEscapes = map[string]string{
+ "": ``,
+ "ContNet": `ContNet`,
+ "\t": `\t`,
+ "\n": `\n`,
+ "\f": "\f",
+ "\r": "\r",
+ " ": `\ `,
+ "\\": `\`,
+ "\x00": "\x00",
+ " ": `\ \ \ \ `,
+ " ": `\ \ \ \ `,
+ " ": `\ \ \ `,
+ " ": `\ \ \ `,
+ " ": `\ \ `,
+ " ": `\ \ `,
+ "\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\x00\xff\u00ff\\xff",
+}
+
+func TestEscapeSpace(t *testing.T) {
+ for k, v := range spaceEscapes {
+ t.Run(k, func(t *testing.T) {
+ e := EscapeSpace(k)
+ if e != v {
+ t.Errorf("EscapeSpace(%q) -> %q, expected %q", k, e, v)
+ }
+ })
+ }
+}
+
+var simpleUnescapes = map[string]string{
+ ``: "",
+ `ContNet`: "ContNet",
+ `\b`: "\b",
+ `\t`: "\t",
+ `\n`: "\n",
+ `\v`: "\v",
+ `\f`: "\f",
+ `\r`: "\r",
+ `\ `: " ",
+ `\\`: "\\",
+ `\`: "\\",
+ `\x00`: "\x00",
+ `a\nb\ c\rd\be\\f`: "a\nb c\rd\be\\f",
+ `\n\n\n`: "\n\n\n",
+ `\x00\xff\n\x123`: "\x00\u00ff\n\x123",
+ " \b\\b\t\n\v\f\r\\x00\x00\\\\xff": " \b\b\t\n\v\f\r\x00\x00\\xff",
+ `\xAA\xAa\xaA\xaa`: "\u00aa\u00aa\u00aa\u00aa",
+ `\x00\xfg`: "\x00\\xfg",
+ `\\\\\\`: "\\\\\\",
+ "\b5Ὂg̀9!\\n℃ᾭG": "\b5Ὂg̀9!\n℃ᾭG",
+ "\xff\\x00\xee\xaa\xee": "\xff\x00\xee\xaa\xee",
+ "\\x00\x10\\ \x30\x40": "\x00\x10\x20\x30\x40",
+ "\x10\x50\x90\xe0": "\x10\x50\x90\xe0",
+ `Hello,\ 世界`: "Hello, 世界",
+ "\xed\x9f\xbf": "\xed\x9f\xbf",
+ "\xee\x80\x80": "\xee\x80\x80",
+ "\xef\xbf\xbd": "\xef\xbf\xbd",
+ "\x80\x80\x80\x80": "\x80\x80\x80\x80",
+ `\ \ \ `: " ",
+ `\uffff\u0000\u0123\ufedc\ufffe`: "\uffff\u0000\u0123\ufedc\ufffe",
+ `\Uffff0000\U0003fedc\U0010ffff\U00110000`: "\ufffd\U0003fedc\U0010ffff\ufffd",
+ `\x0x\u012x\U0123456x`: "\\x0x\\u012x\\U0123456x",
+ `\U0123456`: "\\U0123456",
+ `\u012`: "\\u012",
+ `\x0`: "\\x0",
+ `\x\u\U\a\z\0\-`: "\\x\\u\\U\\a\\z\\0\\-",
+}
+
+func TestUnescape(t *testing.T) {
+ for k, v := range simpleUnescapes {
+ t.Run(k, func(t *testing.T) {
+ u := Unescape(k)
+ if u != v {
+ t.Errorf("Unescape(%q) -> %q, expected %q", k, u, v)
+ }
+ })
+ }
+}
+
+var simpleTexts = map[string]string{
+ "foo": "foo",
+ "\n": "",
+ "\n\r \t\v\f": "\v",
+ " ": "",
+ `\ `: " ",
+ ` \ `: " ",
+ `\ `: " ",
+ `\ \ `: " ",
+ ` \`: "\\",
+ `\`: "\\",
+ ` \ `: " ",
+ ` `: "",
+ `\ \ `: " ",
+ ` \ `: " ",
+ " qwe asd ": "qwe asd",
+ "\\ qwe\nasd\n\nzxc\\n123\n": " qwe asd zxc\n123",
+ `\ \ \ \ \ `: " ",
+ ` \ \ \ `: " ",
+ ` \\ `: "\\",
+ `\ \\ `: " \\",
+ ` \\\ `: "\\ ",
+ ` \ \\\ `: " \\ ",
+ `\ \\ \ `: " \\ ",
+}
+
+func TestParseSimpleText(t *testing.T) {
+ for k, v := range simpleTexts {
+ t.Run(k, func(t *testing.T) {
+ u := ParseSimpleText(k)
+ if u != v {
+ t.Errorf("ParseSimpleText(%q) -> %q, expected %q", k, u, v)
+ }
+ })
+ }
+}
diff --git a/token.go b/token.go
new file mode 100644
index 0000000..a6b08b9
--- /dev/null
+++ b/token.go
@@ -0,0 +1,58 @@
+package cnm
+
+// Token represents a parsed line in a CNM document.
+type Token interface {
+ Indent() int
+ Raw() string
+ Line() int
+}
+
+// TokenLine represents an arbitrary CNM line.
+type TokenLine struct {
+ Indentation int
+ RawLine string
+ LineNo int
+}
+
+// Indent returns the indentation of the parsed line.
+func (t *TokenLine) Indent() int { return t.Indentation }
+
+// Raw returns the original unparsed line.
+func (t *TokenLine) Raw() string { return t.RawLine }
+
+// Line returns the line number in the document, starting from 1.
+func (t *TokenLine) Line() int { return t.LineNo }
+
+// TokenEmptyLine represents an empty line.
+//
+// A line is empty as long as it contains up to as many tab characters as the
+// line's indentation and nothing else.
+type TokenEmptyLine struct {
+ TokenLine
+}
+
+// TokenBlock represents a block header line.
+type TokenBlock struct {
+ TokenLine
+ // Parent is the parent block
+ Parent *TokenBlock
+ // Name is the block name.
+ Name string
+ // Args are the block arguments, split by whitespace and then parsed as
+ // simple text.
+ Args []string
+}
+
+// TokenSimpleText represents a line of simple text.
+type TokenSimpleText struct {
+ TokenLine
+ // Text is the line contents parsed as simple text.
+ Text string
+}
+
+// TokenRawText represents a non-empty line with unparsed contents.
+type TokenRawText struct {
+ TokenLine
+ // Text is the raw contents of the line with the indentation removed.
+ Text string
+}
diff --git a/write_test.go b/write_test.go
new file mode 100644
index 0000000..fc13459
--- /dev/null
+++ b/write_test.go
@@ -0,0 +1,218 @@
+package cnm
+
+import (
+ "bytes"
+ "testing"
+)
+
+var writeTests = map[string]*Document{
+ "": &Document{},
+
+ "title\n\tfoo bar\n": &Document{
+ Title: "foo bar",
+ },
+
+ "title\n\tfoo bar baz\n" +
+ "links\n\tqwe asd\n\tzxc 123\n\tfoo\n" +
+ "site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n\ttest\n": &Document{
+ Title: "foo bar baz",
+ Links: []Link{
+ Link{"qwe", "asd", ""},
+ Link{"zxc", "123", ""},
+ Link{"foo", "", ""},
+ },
+ Site: Site{
+ Children: []Site{
+ Site{
+ Path: "foo",
+ Children: []Site{
+ Site{Path: "bar"},
+ Site{
+ Path: "baz/quux",
+ Children: []Site{
+ Site{Path: "123"},
+ },
+ },
+ },
+ },
+ Site{Path: "test"},
+ },
+ },
+ },
+
+ `title
+ Test document
+content
+ section Test section
+ text
+ This is \n just a
+ text pre
+ t e
+ s t
+
+ raw text/plain
+ of various \n features
+ section of the
+ table
+ header
+ text
+ Column 1
+ text
+ Column 2
+ row
+ text
+ CNM
+ text
+ document
+
+ format
+ row
+ section
+ list
+ text
+ ipsum
+ list ordered
+ list unordered
+ text
+ dolor
+
+ sit amet
+ embed text/cnm cnp://example.com/
+ thing whatever
+`: &Document{
+ Title: "Test document",
+ Content: &ContentBlock{
+ name: "content",
+ args: nil,
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"Test", "section"},
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "This is \n just a",
+ }},
+ },
+ &TextBlock{
+ Format: "pre",
+ Contents: TextPreContents{
+ " t e \n s t \n\t",
+ },
+ },
+ &RawBlock{
+ Syntax: "text/plain",
+ Contents: "of various \\n features",
+ },
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"of the"},
+ children: []Block{
+ &TableBlock{[]Block{
+ &HeaderBlock{ContentBlock{
+ name: "header",
+ args: []string{},
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "Column 1",
+ }},
+ },
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "Column 2",
+ }},
+ },
+ },
+ }},
+ &RowBlock{ContentBlock{
+ name: "row",
+ args: []string{},
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "CNM",
+ }},
+ },
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "document",
+ "format",
+ }},
+ },
+ },
+ }},
+ &RowBlock{ContentBlock{
+ name: "row",
+ args: []string{""},
+ children: []Block{
+ &SectionBlock{ContentBlock{
+ name: "section",
+ args: []string{"", "", ""},
+ }},
+ &ListBlock{ContentBlock{
+ name: "list",
+ args: nil,
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "ipsum",
+ }},
+ },
+ &ListBlock{ContentBlock{
+ name: "list",
+ args: []string{"ordered"},
+ children: []Block{
+ &ListBlock{ContentBlock{
+ name: "list",
+ args: []string{"unordered"},
+ children: []Block{
+ &TextBlock{
+ Contents: TextPlainContents{[]string{
+ "dolor",
+ "sit amet",
+ }},
+ },
+ },
+ }},
+ },
+ }},
+ },
+ }},
+ },
+ }},
+ }},
+ },
+ }},
+ },
+ }},
+ &EmbedBlock{
+ Type: "text/cnm",
+ URL: "cnp://example.com/",
+ Description: "thing whatever",
+ },
+ },
+ },
+ },
+}
+
+func TestWrite(t *testing.T) {
+ for k, v := range writeTests {
+ t.Run(k, func(t *testing.T) {
+ var buf bytes.Buffer
+ err := v.Write(&buf)
+ if err != nil {
+ t.Fatalf("Write error: %v", err)
+ }
+ w := buf.String()
+ t.Log("====================")
+ t.Log("expected:\n" + k)
+ t.Log("--------------------")
+ t.Log(" got:\n" + w)
+ t.Log("====================")
+ if k != w {
+ t.Fatal("Write: output did not match expected document")
+ }
+ })
+ }
+}