Initial commitv0.1.0

author: clsr <clsr@clsr.net> 2017-08-18 13:45:49 +0200
committer: clsr <clsr@clsr.net> 2017-08-18 13:45:49 +0200
commit: 26248678aafc2f8e277d4bdafc116f2b349b02c5 (patch)
tree: 15f82488edb8c05aae756443284731875f36737c
download: cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.tar.gz
cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.zip
10 files changed, 3412 insertions, 0 deletions
diff --git a/cnmfmt/cnmfmt.go b/cnmfmt/cnmfmt.go
new file mode 100644
index 0000000..cb8dc64
--- /dev/null
+++ b/cnmfmt/cnmfmt.go
@@ -0,0 +1,525 @@
+// Package cnmfmt provides parsing and composition for CNMfmt formatting.
+package cnmfmt // import "contnet.org/lib/cnm-go/cnmfmt"
+
+import (
+	"bytes"
+	"io"
+	"strings"
+
+	"contnet.org/lib/cnm-go"
+)
+
+func init() {
+	cnm.RegisterTextContentParser("fmt", parseTextFmt)
+}
+
+// Text represents a paragraph of CNMfmt text.
+type Text struct {
+	// Spans are spans of formatted text.
+	Spans []Span
+}
+
+// ParseParagraph parses a single CNMfmt text paragraph s.
+func ParseParagraph(s string) Text {
+	s = cnm.CollapseWhitespace(s)
+
+	t := Text{}
+	var buf bytes.Buffer
+	format := Format{}
+	last := rune(-1)
+	url := false
+
+	for _, r := range s {
+		if url && format.Link == "" { // need URL for link
+			if handleURL(r, &last, &format, &buf) {
+				continue
+			}
+		}
+
+		switch r {
+		case '*', '/', '_', '`', '@':
+			handleTag(r, &last, &t, &format, &buf, &url)
+
+		case '\\':
+			if last == '\\' {
+				buf.WriteString("\\\\")
+				last = -1
+			} else {
+				if last >= 0 {
+					buf.WriteRune(last)
+				}
+				last = '\\'
+			}
+
+		default:
+			if last >= 0 {
+				buf.WriteRune(last)
+			}
+			buf.WriteRune(r)
+			last = -1
+		}
+	}
+
+	if url && format.Link == "" {
+		if last >= 0 {
+			buf.WriteRune(last)
+		}
+		format.Link = Unescape(buf.String())
+		buf.Reset()
+	} else if last >= 0 {
+		buf.WriteRune(last)
+	}
+	last = -1
+	handleTag(-1, &last, &t, &format, &buf, &url)
+
+	t.trimUnescape()
+
+	return t
+}
+
+func (t *Text) trimUnescape() {
+	var spans []Span
+
+	for _, span := range t.Spans {
+		if span.Text != "" || span.Format.Link != "" {
+			spans = append(spans, span)
+		}
+	}
+	t.Spans, spans = spans, nil
+
+	for i := len(t.Spans) - 1; i >= 0; i-- {
+		span := t.Spans[i]
+		if span.Text != "" || span.Format.Link != "" {
+			spans = append(spans, span)
+		}
+	}
+	for i := 0; i < len(spans)/2; i++ {
+		spans[i], spans[len(spans)-1-i] = spans[len(spans)-1-i], spans[i]
+	}
+	t.Spans = spans
+
+	for i := range t.Spans {
+		t.Spans[i].Text = Unescape(t.Spans[i].Text)
+	}
+}
+
+func (t *Text) appendSpan(format Format, txt string) {
+	if txt != "" || format.Link != "" {
+		t.Spans = append(t.Spans, Span{format, txt})
+	}
+}
+
+func handleURL(r rune, last *rune, format *Format, buf *bytes.Buffer) bool {
+	if r == '@' && *last == '@' { // end without text
+		format.Link = Unescape(buf.String())
+		buf.Reset()
+		return false
+	} else if *last == '\\' {
+		buf.WriteByte('\\')
+		buf.WriteRune(r)
+		*last = -1
+	} else if r == '\\' || r == '@' {
+		*last = r
+	} else if r != ' ' { // url
+		buf.WriteRune(r)
+	} else if buf.Len() > 0 { // space, then text
+		format.Link = Unescape(buf.String())
+		buf.Reset()
+	} // else: prefix space
+	return true
+}
+
+func handleTag(r rune, last *rune, txt *Text, format *Format, buf *bytes.Buffer, url *bool) {
+	if *last == '\\' {
+		buf.WriteRune(r)
+		*last = -1
+	} else if *last == r {
+		txt.appendSpan(*format, buf.String())
+		buf.Reset()
+		switch r {
+		case '*':
+			format.Bold = !format.Bold
+		case '/':
+			format.Italic = !format.Italic
+		case '_':
+			format.Underline = !format.Underline
+		case '`':
+			format.Monospace = !format.Monospace
+		case '@':
+			format.Link = ""
+			*url = !*url
+		}
+		*last = -1
+	} else {
+		switch *last {
+		case '*', '/', '_', '`', '@':
+			buf.WriteRune(*last)
+		}
+		*last = r
+	}
+}
+
+// WriteIndent writes the formatted text indented by n tabs.
+func (t Text) WriteIndent(w io.Writer, n int) error {
+	var state [5]byte // bold, italic, underline, monospace, link
+	si := 0
+	format := Format{}
+	spans := EscapeSpans(t.Spans)
+	var line []string
+	for _, span := range spans {
+		order := tagOrder(state[:si], format, span.Format)
+		for _, f := range order {
+			switch f {
+			case '*':
+				format.Bold = !format.Bold
+				line = append(line, "**")
+			case '/':
+				format.Italic = !format.Italic
+				line = append(line, "//")
+			case '_':
+				format.Underline = !format.Underline
+				line = append(line, "__")
+			case '`':
+				format.Monospace = !format.Monospace
+				line = append(line, "``")
+			case '@':
+				if format.Link != "" {
+					line = append(line, "@@")
+				}
+				if span.Format.Link != "" {
+					pad := ""
+					if span.Text != "" {
+						pad = " "
+					}
+					line = append(line, "@@", cnm.Escape(span.Format.Link), pad)
+				}
+			}
+		}
+		line = append(line, span.Text)
+		si = cleanupTags(state[:], order, span.Format)
+		format = span.Format
+	}
+	return writeIndent(w, strings.Join(line, ""), n)
+}
+
+func tagOrder(state []byte, old, new Format) []byte {
+	ldiff := ""
+	if old.Link != new.Link {
+		ldiff = "1"
+	}
+	diff := Format{
+		Bold:      old.Bold != new.Bold,
+		Italic:    old.Italic != new.Italic,
+		Underline: old.Underline != new.Underline,
+		Monospace: old.Monospace != new.Monospace,
+		Link:      ldiff,
+	}
+
+	var order [5]byte
+	oi := 0
+	for i := len(state) - 1; i >= 0; i-- {
+		switch state[i] {
+		case '*':
+			if diff.Bold {
+				order[oi] = '*'
+				oi++
+				diff.Bold = false
+			}
+		case '/':
+			if diff.Italic {
+				order[oi] = '/'
+				oi++
+				diff.Italic = false
+			}
+		case '_':
+			if diff.Underline {
+				order[oi] = '_'
+				oi++
+				diff.Underline = false
+			}
+		case '`':
+			if diff.Monospace {
+				order[oi] = '`'
+				oi++
+				diff.Monospace = false
+			}
+		case '@':
+			if diff.Link != "" {
+				order[oi] = '@'
+				oi++
+				diff.Link = ""
+			}
+		}
+	}
+
+	if diff.Bold {
+		order[oi] = '*'
+		oi++
+	}
+	if diff.Italic {
+		order[oi] = '/'
+		oi++
+	}
+	if diff.Underline {
+		order[oi] = '_'
+		oi++
+	}
+	if diff.Monospace {
+		order[oi] = '`'
+		oi++
+	}
+	if diff.Link != "" {
+		order[oi] = '@'
+		oi++
+	}
+
+	return order[:oi]
+}
+
+func cleanupTags(state []byte, order []byte, format Format) int {
+	var newState [10]byte
+	copy(newState[:5], state)
+	copy(newState[5:], order)
+	for i := range newState {
+		switch newState[i] {
+		case '*':
+			if !format.Bold {
+				newState[i] = 0
+			}
+		case '/':
+			if !format.Italic {
+				newState[i] = 0
+			}
+		case '_':
+			if !format.Underline {
+				newState[i] = 0
+			}
+		case '`':
+			if !format.Monospace {
+				newState[i] = 0
+			}
+		case '@':
+			if format.Link == "" {
+				newState[i] = 0
+			}
+		}
+	}
+	si := 0
+	for _, f := range newState {
+		if f > 0 {
+			state[si] = f
+			si++
+		}
+	}
+	return si
+}
+
+// Span represents a span of text with a format.
+type Span struct {
+	// Format is the format of the text.
+	Format Format
+
+	// Text is the text content of the span.
+	Text string
+}
+
+// Format represents a state of CNMfmt formatting.
+type Format struct {
+	// Bold text.
+	Bold bool
+
+	// Italic text.
+	Italic bool
+
+	// Underlined text.
+	Underline bool
+
+	// Monospaced text.
+	Monospace bool
+
+	// Hyperlink URL (if non-empty).
+	Link string
+}
+
+// Escape escapes CNMfmt and CNM text special characters.
+func Escape(s string) string {
+	return EscapeFmt(cnm.Escape(s))
+}
+
+// EscapeSpans escapes CNMfmt and CNM text within spans.
+//
+// This function will not needlessly escape spaces at the start or end of a
+// span if the sibling span contains nonspaces.
+func EscapeSpans(spans []Span) []Span {
+	// XXX: this is an ugly solution
+	esc := make([]Span, len(spans))
+	for i := range spans {
+		start := false
+		end := false
+		span := spans[i]
+		if i+1 < len(spans) {
+			s := spans[i+1].Text
+			if len(s) > 0 && s[0] != ' ' {
+				span.Text = span.Text + "x"
+				end = true
+			}
+		}
+		if i > 0 {
+			s := spans[i-1].Text
+			if len(s) > 0 && s[len(s)-1] != ' ' {
+				span.Text = "x" + span.Text
+				start = true
+			}
+		}
+		span.Text = Escape(span.Text)
+		if start {
+			span.Text = span.Text[1:]
+		}
+		if end {
+			span.Text = span.Text[:len(span.Text)-1]
+		}
+		esc[i] = span
+	}
+	return esc
+}
+
+var escapeReplacer = strings.NewReplacer(
+	`*`, `\*`,
+	`/`, `\/`,
+	`_`, `\_`,
+	"`", "\\`",
+	`@`, `\@`,
+)
+
+// EscapeFmt escapes only CNMfmt format toggle characters.
+func EscapeFmt(s string) string {
+	return escapeReplacer.Replace(s)
+}
+
+// Unescape resolves CNM text and CNMfmt escape sequences in s.
+func Unescape(s string) string {
+	return cnm.Unescape(UnescapeFmt(s))
+}
+
+var unescapeReplacer = strings.NewReplacer(
+	`\\`, `\\`,
+	`\*`, `*`,
+	`\/`, `/`,
+	`\_`, `_`,
+	"\\`", "`",
+	`\@`, `@`,
+)
+
+// UnescapeFmt resolves only CNMfmt escape sequences in s.
+func UnescapeFmt(s string) string {
+	return unescapeReplacer.Replace(s)
+}
+
+// TextFmtContents represents CNM `text fmt` contents.
+type TextFmtContents struct {
+	Paragraphs []Text
+}
+
+// NewTextFmtBlock creates a new `text fmt` block containing provided CNMfmt
+// paragraphs.
+func NewTextFmtBlock(paragraphs []Text) *cnm.TextBlock {
+	return cnm.NewTextBlock("fmt", TextFmtContents{paragraphs})
+}
+
+// WriteIndent writes the formatted text contents indented by n tabs.
+func (tf TextFmtContents) WriteIndent(w io.Writer, n int) error {
+	for i, p := range tf.Paragraphs {
+		if i != 0 {
+			if err := writeIndent(w, "", 0); err != nil {
+				return err
+			}
+		}
+		if err := p.WriteIndent(w, n); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Parse parses paragraphs of CNMfmt text.
+func Parse(paragraphs string) []Text {
+	var txt []Text
+	var paragraph []string
+
+	for _, line := range strings.Split(paragraphs, "\n") {
+		end := false
+		if line != "" {
+			if strings.Trim(line, "\n\r\t\f ") == "" {
+				end = true
+			} else {
+				paragraph = append(paragraph, line)
+			}
+		} else if len(paragraph) > 0 {
+			end = true
+		}
+		if end {
+			txt = append(txt, ParseParagraph(strings.Join(paragraph, " ")))
+			paragraph = nil
+		}
+	}
+	if len(paragraph) > 0 {
+		txt = append(txt, ParseParagraph(strings.Join(paragraph, " ")))
+	}
+
+	return txt
+}
+
+func writeIndent(w io.Writer, s string, depth int) error {
+	const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+	if s == "" {
+		_, err := w.Write([]byte{'\n'})
+		return err
+	}
+	if depth == 0 {
+		_, err := w.Write([]byte(s + "\n"))
+		return err
+	}
+
+	var ind string
+	if depth <= len(tabs) {
+		ind = tabs[:depth]
+	} else {
+		ind = strings.Repeat("\t", depth)
+	}
+	_, err := w.Write([]byte(ind + s + "\n"))
+	return err
+
+}
+
+func parseTextFmt(p *cnm.Parser, block *cnm.TokenBlock) (cnm.TextContents, error) {
+	txt := TextFmtContents{}
+	var paragraph []string
+	var err error
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.RawText()
+		end := false
+		if text, ok := token.(*cnm.TokenRawText); ok {
+			if strings.Trim(text.Text, "\n\r\t\f ") == "" {
+				end = true
+			} else {
+				paragraph = append(paragraph, text.Text)
+			}
+		} else if _, ok := token.(*cnm.TokenEmptyLine); ok && len(paragraph) > 0 {
+			end = true
+		}
+		if end {
+			txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " ")))
+			paragraph = nil
+		}
+		err = p.Next()
+	}
+	if len(paragraph) > 0 {
+		txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " ")))
+	}
+	return txt, err
+}
diff --git a/cnmfmt/cnmfmt_test.go b/cnmfmt/cnmfmt_test.go
new file mode 100644
index 0000000..89a40a9
--- /dev/null
+++ b/cnmfmt/cnmfmt_test.go
@@ -0,0 +1,457 @@
+package cnmfmt
+
+import (
+	"bytes"
+	"io"
+	"strings"
+	"testing"
+
+	"contnet.org/lib/cnm-go"
+)
+
+var parseTests = map[string]Text{
+	"\\nfoo\nbar\\": Text{[]Span{
+		Span{Format{}, "\nfoo bar\\"},
+	}},
+	"**foo": Text{[]Span{
+		Span{Format{Bold: true}, "foo"},
+	}},
+	"//foo": Text{[]Span{
+		Span{Format{Italic: true}, "foo"},
+	}},
+	"__foo": Text{[]Span{
+		Span{Format{Underline: true}, "foo"},
+	}},
+	"``foo": Text{[]Span{
+		Span{Format{Monospace: true}, "foo"},
+	}},
+	"foo*bar": Text{[]Span{
+		Span{Format{}, "foo*bar"},
+	}},
+	"foo*": Text{[]Span{
+		Span{Format{}, "foo*"},
+	}},
+	"foo**": Text{[]Span{
+		Span{Format{}, "foo"},
+	}},
+	"foo***": Text{[]Span{
+		Span{Format{}, "foo"},
+		Span{Format{Bold: true}, "*"},
+	}},
+	"foo****": Text{[]Span{
+		Span{Format{}, "foo"},
+	}},
+	"*foo": Text{[]Span{
+		Span{Format{}, "*foo"},
+	}},
+	"****foo": Text{[]Span{
+		Span{Format{}, "foo"},
+	}},
+	"******foo": Text{[]Span{
+		Span{Format{Bold: true}, "foo"},
+	}},
+	"foo ** bar": Text{[]Span{
+		Span{Format{}, "foo "},
+		Span{Format{Bold: true}, " bar"},
+	}},
+	"foo** bar": Text{[]Span{
+		Span{Format{}, "foo"},
+		Span{Format{Bold: true}, " bar"},
+	}},
+	"foo **bar": Text{[]Span{
+		Span{Format{}, "foo "},
+		Span{Format{Bold: true}, "bar"},
+	}},
+	"foo ** bar ** baz": Text{[]Span{
+		Span{Format{}, "foo "},
+		Span{Format{Bold: true}, " bar "},
+		Span{Format{}, " baz"},
+	}},
+	"foo ** bar** baz": Text{[]Span{
+		Span{Format{}, "foo "},
+		Span{Format{Bold: true}, " bar"},
+		Span{Format{}, " baz"},
+	}},
+	"**__**foo": Text{[]Span{
+		Span{Format{Underline: true}, "foo"},
+	}},
+	"***": Text{[]Span{
+		Span{Format{Bold: true}, "*"},
+	}},
+	"*\\**": Text{[]Span{
+		Span{Format{}, "***"},
+	}},
+	"\\*": Text{[]Span{
+		Span{Format{}, "*"},
+	}},
+	"\\*\\*": Text{[]Span{
+		Span{Format{}, "**"},
+	}},
+	"\\**": Text{[]Span{
+		Span{Format{}, "**"},
+	}},
+	"*\\*": Text{[]Span{
+		Span{Format{}, "**"},
+	}},
+	"\\": Text{[]Span{
+		Span{Format{}, "\\"},
+	}},
+	"\\\\": Text{[]Span{
+		Span{Format{}, "\\"},
+	}},
+	" ** // `` ": Text{[]Span{
+		Span{Format{Bold: true}, " "},
+		Span{Format{Bold: true, Italic: true}, " "},
+	}},
+	"**": Text{[]Span{}},
+	"**``__//foo": Text{[]Span{
+		Span{Format{Bold: true, Monospace: true, Underline: true, Italic: true}, "foo"},
+	}},
+	"**foo//bar**baz": Text{[]Span{
+		Span{Format{Bold: true}, "foo"},
+		Span{Format{Bold: true, Italic: true}, "bar"},
+		Span{Format{Italic: true}, "baz"},
+	}},
+	"@@foo": Text{[]Span{
+		Span{Format{Link: "foo"}, ""},
+	}},
+	"@@foo@@": Text{[]Span{
+		Span{Format{Link: "foo"}, ""},
+	}},
+	"@@foo bar@@": Text{[]Span{
+		Span{Format{Link: "foo"}, "bar"},
+	}},
+	"@@  foo": Text{[]Span{
+		Span{Format{Link: "foo"}, ""},
+	}},
+	"@@foo  ": Text{[]Span{
+		Span{Format{Link: "foo"}, ""},
+	}},
+	"@@foo\\": Text{[]Span{
+		Span{Format{Link: "foo\\"}, ""},
+	}},
+	"@@foo \\": Text{[]Span{
+		Span{Format{Link: "foo"}, "\\"},
+	}},
+	"@@foo \\\\": Text{[]Span{
+		Span{Format{Link: "foo"}, "\\"},
+	}},
+	"@@foo@": Text{[]Span{
+		Span{Format{Link: "foo@"}, ""},
+	}},
+	"@@foo\\@@": Text{[]Span{
+		Span{Format{Link: "foo@@"}, ""},
+	}},
+	"@@f\\\\o\\o\\n @": Text{[]Span{
+		Span{Format{Link: "f\\o\\o\n"}, "@"},
+	}},
+	"@@http://example.com foo **bar @@baz**": Text{[]Span{
+		Span{Format{Link: "http://example.com"}, "foo "},
+		Span{Format{Bold: true, Link: "http://example.com"}, "bar "},
+		Span{Format{Bold: true}, "baz"},
+	}},
+	"//@@http://example.com foo //bar @@": Text{[]Span{
+		Span{Format{Italic: true, Link: "http://example.com"}, "foo "},
+		Span{Format{Link: "http://example.com"}, "bar "},
+	}},
+	"__\\  asd \\ zxc\\ ": Text{[]Span{
+		Span{Format{Underline: true, Monospace: false}, "  asd  zxc "},
+	}},
+	"@@/ test/@@": Text{[]Span{
+		Span{Format{Link: "/"}, "test/"},
+	}},
+	"@@/ /test@@": Text{[]Span{
+		Span{Format{Link: "/"}, "/test"},
+	}},
+	"/": Text{[]Span{
+		Span{Format{}, "/"},
+	}},
+	"test/**": Text{[]Span{
+		Span{Format{}, "test/"},
+	}},
+	"//test/": Text{[]Span{
+		Span{Format{Italic: true}, "test/"},
+	}},
+	"/**test": Text{[]Span{
+		Span{Format{}, "/"},
+		Span{Format{Bold: true}, "test"},
+	}},
+}
+
+func TestParseParagraph(t *testing.T) {
+	for k, v := range parseTests {
+		t.Run(k, func(t *testing.T) {
+			txt := ParseParagraph(k)
+			if !textEqual(txt, v) {
+				t.Errorf("ParseParagraph(%q):\nexpected: %#v\n     got: %#v", k, v, txt)
+			}
+		})
+	}
+}
+
+func TestParse(t *testing.T) {
+	for k, v := range parseTests {
+		t.Run(k, func(t *testing.T) {
+			txts := Parse(k)
+			if len(txts) != 1 || !textEqual(txts[0], v) {
+				t.Errorf("Parse(%q):\nexpected: %#v\n     got: %#v", k, []Text{v}, txts)
+			}
+		})
+	}
+}
+
+func textEqual(a, b Text) bool {
+	if len(a.Spans) != len(b.Spans) {
+		return false
+	}
+	for i := range a.Spans {
+		if a.Spans[i] != b.Spans[i] {
+			return false
+		}
+	}
+	return true
+}
+
+var escapeTests = map[string]string{
+	"\n\r\t\v\x00":     "\\n\\r\\t\v\\x00",
+	"@@!!##__//__``**": "\\@\\@!!##\\_\\_\\/\\/\\_\\_\\`\\`\\*\\*",
+	`foo\@\@bar`:       `foo\\\@\\\@bar`,
+}
+
+func TestEscape(t *testing.T) {
+	for k, v := range escapeTests {
+		t.Run(k, func(t *testing.T) {
+			if e := Escape(k); e != v {
+				t.Errorf("Escape(%q): expected %q, got %q", k, v, e)
+			}
+		})
+	}
+}
+
+var parseTextTests = map[string]TextFmtContents{
+	"foo  ** bar\nbaz\n\n\nquux ** ": TextFmtContents{[]Text{
+		Text{[]Span{
+			Span{Format{}, "foo "},
+			Span{Format{Bold: true}, " bar baz"},
+		}},
+		Text{[]Span{
+			Span{Format{}, "quux "},
+		}},
+	}},
+
+	"\n": TextFmtContents{},
+
+	"foo": TextFmtContents{[]Text{
+		Text{[]Span{
+			Span{Format{}, "foo"},
+		}},
+	}},
+
+	"\n\n": TextFmtContents{},
+
+	"foo\n\t\t\t\t\nbar": TextFmtContents{[]Text{
+		Text{[]Span{Span{Format{}, "foo"}}},
+		Text{[]Span{Span{Format{}, "bar"}}},
+	}},
+
+	"foo\n\t\t   \f\r\t\nbar": TextFmtContents{[]Text{
+		Text{[]Span{Span{Format{}, "foo"}}},
+		Text{[]Span{Span{Format{}, "bar"}}},
+	}},
+
+	`foo**bar\*\*baz\*\*quux**qweasd`: TextFmtContents{[]Text{Text{[]Span{
+		Span{Format{}, "foo"},
+		Span{Format{Bold: true}, "bar**baz**quux"},
+		Span{Format{}, "qweasd"},
+	}}}},
+}
+
+func TestParseTextFmt(t *testing.T) {
+	for k, v := range parseTextTests {
+		t.Run(k, func(t *testing.T) {
+			parser := cnm.NewParser(strings.NewReader(k))
+			err := parser.Next()
+			if err != nil && err != io.EOF {
+				t.Fatalf("error parsing %q: %v", k, err)
+			}
+			content, err := parseTextFmt(parser, cnm.TopLevel)
+			if err != nil && err != io.EOF {
+				t.Fatalf("error parsing %q: %v", k, err)
+			}
+			tf, ok := content.(TextFmtContents)
+			if !ok {
+				t.Fatalf("%q: expected type %T, got %T", k, v, content)
+			}
+			if !paragraphsEqual(v.Paragraphs, tf.Paragraphs) {
+				t.Fatalf("%q:\nexpected: %#v\n     got: %#v", k, v, tf)
+			}
+			txts := Parse(k)
+			if !paragraphsEqual(txts, v.Paragraphs) {
+				t.Fatalf("%q:\nexpected: %#v\n     got: %#v", k, v.Paragraphs, txts)
+			}
+		})
+	}
+}
+
+func paragraphsEqual(a, b []Text) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i := range a {
+		if !textEqual(a[i], b[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+var writeTests = map[string]TextFmtContents{
+	"": TextFmtContents{},
+
+	"foo\n": TextFmtContents{[]Text{
+		Text{[]Span{
+			Span{Format{}, "foo"},
+		}},
+	}},
+
+	"**foo\n": TextFmtContents{[]Text{
+		Text{[]Span{
+			Span{Format{Bold: true}, "foo"},
+		}},
+	}},
+
+	"foo **bar baz\n\nquux\n": TextFmtContents{[]Text{
+		Text{[]Span{
+			Span{Format{}, "foo "},
+			Span{Format{Bold: true}, "bar baz"},
+		}},
+		Text{[]Span{
+			Span{Format{}, "quux"},
+		}},
+	}},
+
+	"foo**bar``baz**quux\n\n" +
+		"\\ __qwe\\ __//\\  asd \\ //``zxc``**\\ \n\n" +
+		"//@@http://example.com exa//mple@@ @@href text@@// test\n": TextFmtContents{[]Text{
+		Text{[]Span{
+			Span{Format{}, "foo"},
+			Span{Format{Bold: true}, "bar"},
+			Span{Format{Bold: true, Monospace: true}, "baz"},
+			Span{Format{Monospace: true}, "quux"},
+		}},
+		Text{[]Span{
+			Span{Format{}, " "},
+			Span{Format{Underline: true}, "qwe "},
+			Span{Format{Italic: true}, "  asd  "},
+			Span{Format{Monospace: true}, "zxc"},
+			Span{Format{Bold: true}, " "},
+		}},
+		Text{[]Span{
+			Span{Format{Italic: true, Link: "http://example.com"}, "exa"},
+			Span{Format{Link: "http://example.com"}, "mple"},
+			Span{Format{}, " "},
+			Span{Format{Link: "href"}, "text"},
+			Span{Format{Italic: true}, " test"},
+		}},
+	}},
+
+	"foo**bar\\*\\*baz\\*\\*quux**qweasd\n": TextFmtContents{[]Text{Text{[]Span{
+		Span{Format{}, "foo"},
+		Span{Format{Bold: true}, "bar**baz**quux"},
+		Span{Format{}, "qweasd"},
+	}}}},
+}
+
+func TestWriteTextFmt(t *testing.T) {
+	for k, v := range writeTests {
+		t.Run(k, func(t *testing.T) {
+			var buf bytes.Buffer
+			err := v.WriteIndent(&buf, 0)
+			if err != nil {
+				t.Fatalf("WriteIndent error: %v", err)
+			}
+			w := buf.String()
+			t.Log("expected:\n" + k)
+			t.Log("     got:\n" + w)
+			if k != w {
+				t.Fatalf("WriteIndent: output did not match expected document:\nexpected: %q\n     got: %q", k, w)
+			}
+		})
+	}
+}
+
+func TestWriteParseTextFmt(t *testing.T) {
+	for k, v := range writeTests {
+		t.Run(k, func(t *testing.T) {
+			var buf bytes.Buffer
+			err := v.WriteIndent(&buf, 0)
+			if err != nil {
+				t.Fatalf("WriteIndent error: %v", err)
+			}
+			w := buf.String()
+
+			if w == "" {
+				w = "\n"
+			}
+			parser := cnm.NewParser(strings.NewReader(w))
+			err = parser.Next()
+			if err != nil && err != io.EOF {
+				t.Fatalf("error parsing %q: %v", w, err)
+			}
+			content, err := parseTextFmt(parser, cnm.TopLevel)
+			if err != nil && err != io.EOF {
+				t.Fatalf("error parsing %q: %v", w, err)
+			}
+			tf, ok := content.(TextFmtContents)
+			if !ok {
+				t.Fatalf("%q: expected type %T, got %T", w, v, content)
+			}
+			if !paragraphsEqual(v.Paragraphs, tf.Paragraphs) {
+				t.Fatalf("%q:\nexpected: %#v\n     got: %#v", k, v, tf)
+			}
+		})
+	}
+}
+
+func TestParseWriteTextFmt(t *testing.T) {
+	for k, v := range writeTests {
+		t.Run(k, func(t *testing.T) {
+			s := k
+			if s == "" {
+				s = "\n"
+			}
+			parser := cnm.NewParser(strings.NewReader(s))
+			err := parser.Next()
+			if err != nil && err != io.EOF {
+				t.Fatalf("error parsing %q: %v", k, err)
+			}
+
+			content, err := parseTextFmt(parser, cnm.TopLevel)
+			if err != nil && err != io.EOF {
+				t.Fatalf("error parsing %q: %v", k, err)
+			}
+			tf, ok := content.(TextFmtContents)
+			if !ok {
+				t.Fatalf("%q: expected type %T, got %T", k, v, content)
+			}
+			if !paragraphsEqual(tf.Paragraphs, v.Paragraphs) {
+				t.Fatalf("%q: expected %#v, got %#v", k, v, tf)
+			}
+
+			var buf bytes.Buffer
+			err = tf.WriteIndent(&buf, 0)
+			if err != nil {
+				t.Fatalf("WriteIndent error: %v", err)
+			}
+
+			w := buf.String()
+			/*if w == "\n" {
+				k = ""
+			}*/
+
+			if k != w {
+				t.Fatalf("%q:\nexpected: %#v\n     got: %#v", k, k, w)
+			}
+		})
+	}
+}
diff --git a/content.go b/content.go
new file mode 100644
index 0000000..971e9e3
--- /dev/null
+++ b/content.go
@@ -0,0 +1,610 @@
+package cnm
+
+import (
+	"io"
+	"strings"
+)
+
+func init() {
+	RegisterTextContentParser("", parseTextPlain)
+	RegisterTextContentParser("plain", parseTextPlain)
+	RegisterTextContentParser("pre", parseTextPre)
+}
+
+// Block represents an arbitrary CNM within the "content" top-level block.
+type Block interface {
+	// Name returns the name of the block.
+	Name() string
+
+	// Args returns the block arguments.
+	Args() []string
+	WriteIndent(w io.Writer, n int) error
+}
+
+// ContentBlock represents a block that holds other content blocks.
+type ContentBlock struct {
+	name     string
+	args     []string
+	children []Block
+}
+
+// WriteIndent writes the block header and its children indented by n tabs.
+func (cb *ContentBlock) WriteIndent(w io.Writer, n int) error {
+	ss := []string{Escape(cb.name)}
+	ss = append(ss, cb.args...)
+	if err := writeIndent(w, JoinEscape(ss), n); err != nil {
+		return err
+	}
+	for _, ch := range cb.children {
+		if err := ch.WriteIndent(w, n+1); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// NewContentBlock creates a new ContentBlock with a name and argument.
+func NewContentBlock(name string, args ...string) *ContentBlock {
+	var a []string
+	for _, arg := range args {
+		if arg != "" {
+			a = append(a, arg)
+		}
+	}
+	return &ContentBlock{name: name, args: a}
+}
+
+// Name returns the block's name.
+func (cb *ContentBlock) Name() string {
+	return cb.name
+}
+
+// Args returns the block arguments.
+func (cb *ContentBlock) Args() []string {
+	return cb.args
+}
+
+// Children returns the block's child blocks.
+func (cb *ContentBlock) Children() []Block {
+	return cb.children
+}
+
+// AppendChild adds a new child block to the end of the list of children.
+func (cb *ContentBlock) AppendChild(block Block) {
+	cb.children = append(cb.children, block)
+}
+
+func (cb *ContentBlock) parse(p *Parser, block *TokenBlock) (err error) {
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.Block()
+		if blk, ok := token.(*TokenBlock); ok {
+			var b Block
+			switch blk.Name {
+			case "section":
+				b, err = parseContentSection(p, blk)
+			case "text":
+				b, err = parseContentText(p, blk)
+			case "raw":
+				b, err = parseContentRaw(p, blk)
+			case "list":
+				b, err = parseContentList(p, blk)
+			case "table":
+				b, err = parseContentTable(p, blk)
+			case "embed":
+				b, err = parseContentEmbed(p, blk)
+			default:
+				err = parseUnknown(p, blk)
+			}
+			if b != nil {
+				cb.AppendChild(b)
+			}
+		} else if err = p.Next(); err != nil {
+			break
+		}
+	}
+	return
+}
+
+// SectionBlock represents a "section" content block.
+type SectionBlock struct {
+	ContentBlock
+}
+
+// NewSectionBlock creates a new SectionBlock with a title.
+func NewSectionBlock(title string) *SectionBlock {
+	return &SectionBlock{*NewContentBlock("section", title)}
+}
+
+// Title returns the section block's title.
+func (b *SectionBlock) Title() string { return strings.Join(b.args, " ") }
+
+func parseContentSection(p *Parser, block *TokenBlock) (*SectionBlock, error) {
+	sec := NewSectionBlock(strings.Join(block.Args, " "))
+	if err := p.Next(); err != nil {
+		return sec, err
+	}
+	return sec, sec.parse(p, block)
+}
+
+// TextBlock represents a "text" content block.
+type TextBlock struct {
+	// Format is the text format (first word of the block argument).
+	Format string
+	// Contents are the text contents.
+	Contents TextContents
+}
+
+// NewTextBlock creates a new TextBlock containing arbitrary text contents.
+func NewTextBlock(format string, contents TextContents) *TextBlock {
+	return &TextBlock{format, contents}
+}
+
+// Name returns the block name "text".
+func (t *TextBlock) Name() string { return "text" }
+
+// Args returns the block's arguments (format).
+func (t *TextBlock) Args() []string {
+	if t.Format == "" {
+		return nil
+	}
+	return []string{t.Format}
+}
+
+// WriteIndent writes the block header and its content indented by n tabs.
+func (t *TextBlock) WriteIndent(w io.Writer, n int) error {
+	s := t.Name()
+	if t.Format != "" {
+		s += " " + Escape(t.Format)
+	}
+	if err := writeIndent(w, s, n); err != nil {
+		return err
+	}
+	if err := t.Contents.WriteIndent(w, n+1); err != nil {
+		return err
+	}
+	return nil
+}
+
+func parseContentText(p *Parser, block *TokenBlock) (*TextBlock, error) {
+	format := ""
+	if len(block.Args) >= 1 {
+		format = block.Args[0]
+	}
+	tb := NewTextBlock(format, nil)
+
+	if err := p.Next(); err != nil {
+		return tb, err
+	}
+
+	var err error
+	tb.Contents, err = parseTextFormat(p, block, tb.Format)
+
+	return tb, err
+}
+
+func parseTextFormat(p *Parser, block *TokenBlock, format string) (TextContents, error) {
+	if parser := GetTextContentParser(format); parser != nil {
+		return parser(p, block)
+	}
+	r, err := parseContentRaw(p, block)
+	return TextPreContents{r.Contents}, err
+}
+
+// TextContents represents the textual contents of a text block.
+type TextContents interface {
+	WriteIndent(w io.Writer, n int) error
+}
+
+// TextContentParser parses text content in a text block.
+type TextContentParser func(p *Parser, block *TokenBlock) (TextContents, error)
+
+var textContentParsers = map[string]TextContentParser{}
+
+// GetTextContentParser retrieves a text content parser or nil if it doesn't
+// exist.
+func GetTextContentParser(name string) TextContentParser {
+	return textContentParsers[name]
+}
+
+// RegisterTextContentParser registers a new text content parser for a format.
+func RegisterTextContentParser(name string, parser TextContentParser) {
+	if parser == nil {
+		delete(textContentParsers, name)
+	} else {
+		textContentParsers[name] = parser
+	}
+}
+
+// TextPlainContents represents a list of simple text paragraphs.
+type TextPlainContents struct {
+	// Paragraphs is a list of simple text paragraphs.
+	Paragraphs []string
+}
+
+// WriteIndent writes the plain text content indented by n tabs.
+func (t TextPlainContents) WriteIndent(w io.Writer, n int) error {
+	for i, p := range t.Paragraphs {
+		if i != 0 {
+			if err := writeIndent(w, "", 0); err != nil {
+				return err
+			}
+		}
+		if err := writeIndent(w, Escape(p), n); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// NewTextPlainBlock creates a new TextBlock containing TextPlainContents.
+func NewTextPlainBlock(paragraphs []string) *TextBlock {
+	par := make([]string, len(paragraphs))
+	copy(par, paragraphs)
+	return NewTextBlock("", TextPlainContents{par})
+}
+
+func parseTextPlain(p *Parser, block *TokenBlock) (TextContents, error) {
+	txt := TextPlainContents{}
+	paragraph := ""
+	var err error
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.SimpleText()
+		end := false
+		if text, ok := token.(*TokenSimpleText); ok {
+			if text.Text == "" {
+				end = true
+			} else if paragraph == "" {
+				paragraph = text.Text
+			} else {
+				paragraph += " " + text.Text
+			}
+		} else if _, ok := token.(*TokenEmptyLine); ok && paragraph != "" {
+			end = true
+		}
+		if end {
+			txt.Paragraphs = append(txt.Paragraphs, paragraph)
+			paragraph = ""
+		}
+		err = p.Next()
+	}
+	if paragraph != "" {
+		txt.Paragraphs = append(txt.Paragraphs, paragraph)
+	}
+	return txt, err
+}
+
+// TextPreContents represents preformatted contents of a text block.
+type TextPreContents struct {
+	// Text is the preformatted content.
+	Text string
+}
+
+// WriteIndent writes the preformatted text content indented by n tabs.
+func (t TextPreContents) WriteIndent(w io.Writer, n int) error {
+	ss := strings.Split(t.Text, "\n")
+	for _, s := range ss {
+		if err := writeIndent(w, EscapeNonspace(s), n); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// NewTextPreBlock creates a new TextBlock containing TextPreContents.
+func NewTextPreBlock(text string) *TextBlock {
+	return NewTextBlock("", TextPreContents{text})
+}
+
+func parseTextPre(p *Parser, block *TokenBlock) (TextContents, error) {
+	var lines []string
+	var ls []string
+	var err error
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.RawText()
+		if text, ok := token.(*TokenRawText); ok {
+			if len(ls) > 0 {
+				lines = append(lines, ls...)
+				ls = ls[:0]
+			}
+			lines = append(lines, Unescape(text.Text))
+		} else if _, ok := token.(*TokenEmptyLine); ok && len(lines) > 0 {
+			ls = append(ls, "")
+		}
+		err = p.Next()
+	}
+	return TextPreContents{strings.Join(lines, "\n")}, err
+}
+
+// RawBlock represents a "raw" content block.
+type RawBlock struct {
+	// Syntax is the syntax of the block contents (first word of block argument)
+	Syntax string
+
+	// Contents is the raw content.
+	Contents string
+}
+
+// Name returns the block name "raw".
+func (r *RawBlock) Name() string { return "raw" }
+
+// Args returns the block's arguments (syntax).
+func (r *RawBlock) Args() []string {
+	if r.Syntax == "" {
+		return nil
+	}
+	return []string{r.Syntax}
+}
+
+// WriteIndent writes the raw content indented by n tabs.
+func (r *RawBlock) WriteIndent(w io.Writer, n int) error {
+	s := r.Name()
+	if r.Syntax != "" {
+		s += " " + Escape(r.Syntax)
+	}
+	if err := writeIndent(w, s, n); err != nil {
+		return err
+	}
+	if r.Contents != "" {
+		ss := strings.Split(r.Contents, "\n")
+		for _, s := range ss {
+			if err := writeIndent(w, s, n+1); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func parseContentRaw(p *Parser, block *TokenBlock) (*RawBlock, error) {
+	arg := ""
+	if len(block.Args) > 0 {
+		arg = block.Args[0]
+	}
+	rb := &RawBlock{arg, ""}
+
+	if err := p.Next(); err != nil {
+		return rb, err
+	}
+
+	var lines []string
+	var ls []string
+	var err error
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.RawText()
+		if text, ok := token.(*TokenRawText); ok {
+			if len(ls) > 0 {
+				lines = append(lines, ls...)
+				ls = ls[:0]
+			}
+			lines = append(lines, text.Text)
+		} else if _, ok := token.(*TokenEmptyLine); ok && len(lines) > 0 {
+			ls = append(ls, "")
+		}
+		err = p.Next()
+	}
+	rb.Contents = strings.Join(lines, "\n")
+
+	return rb, err
+}
+
+// ListBlock represents a "list" content block.
+type ListBlock struct {
+	ContentBlock
+}
+
+// NewListBlock creates a new ListBlock.
+//
+// If the ordered parameter is true, the list is created in "ordered" mode.
+func NewListBlock(ordered bool) *ListBlock {
+	arg := ""
+	if ordered {
+		arg = "ordered"
+	}
+	return &ListBlock{*NewContentBlock("list", arg)}
+}
+
+// Ordered returns true if the list is in ordered mode (first word of the
+// block argument is "ordered").
+func (b *ListBlock) Ordered() bool {
+	return len(b.args) >= 1 && b.args[0] == "ordered"
+}
+
+func parseContentList(p *Parser, block *TokenBlock) (*ListBlock, error) {
+	list := NewListBlock(false)
+	list.args = block.Args
+	if err := p.Next(); err != nil {
+		return list, err
+	}
+	return list, list.parse(p, block)
+}
+
+// TableBlock represents a "table" content block.
+type TableBlock struct {
+	rows []Block
+}
+
+// NewTableBlock creates a new TableBlock.
+func NewTableBlock() *TableBlock {
+	return &TableBlock{}
+}
+
+// Name returns the block name "table".
+func (t *TableBlock) Name() string {
+	return "table"
+}
+
+// Args returns the block's nil arguments.
+func (t *TableBlock) Args() []string {
+	return nil
+}
+
+// WriteIndent writes the table header and contents indented by n tabs.
+func (t *TableBlock) WriteIndent(w io.Writer, n int) error {
+	if err := writeIndent(w, t.Name(), n); err != nil {
+		return err
+	}
+	for _, row := range t.rows {
+		if err := row.WriteIndent(w, n+1); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Rows returns the table's rows.
+func (t *TableBlock) Rows() []Block {
+	return t.rows
+}
+
+// AppendRow adds a new row to the end of the table.
+func (t *TableBlock) AppendRow(row Block) {
+	t.rows = append(t.rows, row)
+}
+
+func (t *TableBlock) parse(p *Parser, block *TokenBlock) (err error) {
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.Block()
+		if blk, ok := token.(*TokenBlock); ok {
+			var b Block
+			switch blk.Name {
+			case "row":
+				b, err = parseTableRow(p, blk)
+			case "header":
+				b, err = parseTableHeader(p, blk)
+			default:
+				err = parseUnknown(p, blk)
+			}
+			if b != nil {
+				t.AppendRow(b)
+			}
+		} else if err = p.Next(); err != nil {
+			break
+		}
+	}
+	return
+}
+
+func parseContentTable(p *Parser, block *TokenBlock) (*TableBlock, error) {
+	table := NewTableBlock()
+	if err := p.Next(); err != nil {
+		return table, err
+	}
+	return table, table.parse(p, block)
+}
+
+// RowBlock represents a "row" table block.
+type RowBlock struct {
+	ContentBlock
+}
+
+// NewRowBlock creates a new RowBlock.
+func NewRowBlock() *RowBlock {
+	return &RowBlock{*NewContentBlock("row", "")}
+}
+
+func parseTableRow(p *Parser, block *TokenBlock) (*RowBlock, error) {
+	row := NewRowBlock()
+	if err := p.Next(); err != nil {
+		return row, err
+	}
+	return row, row.parse(p, block)
+}
+
+// HeaderBlock represents a "header" table block.
+type HeaderBlock struct {
+	ContentBlock
+}
+
+// NewHeaderBlock creates a new HeaderBlock.
+func NewHeaderBlock() *HeaderBlock {
+	return &HeaderBlock{*NewContentBlock("header", "")}
+}
+
+func parseTableHeader(p *Parser, block *TokenBlock) (*HeaderBlock, error) {
+	hdr := NewHeaderBlock()
+	if err := p.Next(); err != nil {
+		return hdr, err
+	}
+	return hdr, hdr.parse(p, block)
+}
+
+// EmbedBlock represents an "embed" content block.
+type EmbedBlock struct {
+	// Type is the content type (first word of block argument).
+	Type string
+
+	// URL is the content URL (second word of the block argument).
+	URL string
+
+	// Description is the content description (block body as simple text).
+	Description string
+}
+
+// Name returns the block name "embed".
+func (e *EmbedBlock) Name() string { return "embed" }
+
+// Args returns the block argument (type and URL).
+func (e *EmbedBlock) Args() []string {
+	if e.Type != "" && e.URL != "" {
+		return []string{e.Type, e.URL}
+	}
+	return []string{e.Type}
+}
+
+// WriteIndent writes the embed block header and contents indented by n tabs.
+func (e *EmbedBlock) WriteIndent(w io.Writer, n int) error {
+	if e.URL == "" {
+		return nil
+	}
+
+	s := e.Name() + " "
+	if e.Type == "" {
+		s += "*/*"
+	} else {
+		s += Escape(e.Type)
+	}
+	s += " " + Escape(e.URL)
+	if err := writeIndent(w, s, n); err != nil {
+		return err
+	}
+	if err := writeIndent(w, Escape(e.Description), n+1); err != nil {
+		return err
+	}
+	return nil
+}
+
+func parseContentEmbed(p *Parser, block *TokenBlock) (*EmbedBlock, error) {
+	embed := &EmbedBlock{}
+	if len(block.Args) >= 1 {
+		embed.Type = block.Args[0]
+		if len(block.Args) >= 2 {
+			embed.URL = block.Args[1]
+		}
+	}
+	if err := p.Next(); err != nil {
+		return embed, err
+	}
+	s, err := getSimpleText(p, block)
+	embed.Description = s
+	return embed, err
+}
diff --git a/document.go b/document.go
new file mode 100644
index 0000000..b5cdbe5
--- /dev/null
+++ b/document.go
@@ -0,0 +1,278 @@
+// Package cnm implements CNM document parsing and composition.
+package cnm // import "contnet.org/lib/cnm-go"
+
+import (
+	"bufio"
+	"io"
+	"path"
+	"strings"
+)
+
+// Document represents a CNM document.
+type Document struct {
+	// Title is the document title (top-level "title" block).
+	Title string
+
+	// Links is a list of document-level hyperlinks (top-level "links" block).
+	Links []Link
+
+	// Site is a sitemap (top-level "site" block).
+	Site Site
+
+	// Content is the document content (top-level "content" block).
+	Content *ContentBlock
+}
+
+// ParseDocument parses a CNM document from r.
+func ParseDocument(r io.Reader) (doc *Document, err error) {
+	p := NewParser(r)
+	doc = &Document{}
+	err = p.Next()
+	for err == nil {
+		token := p.Block()
+		if err = p.Next(); err != nil {
+			break
+		}
+		if blk, ok := token.(*TokenBlock); ok {
+			switch blk.Name {
+			case "title":
+				err = doc.parseTitle(p, blk)
+			case "links":
+				err = doc.parseLinks(p, blk)
+			case "site":
+				err = doc.Site.parse(p, blk)
+			case "content":
+				if doc.Content == nil {
+					doc.Content = &ContentBlock{name: "content"}
+				}
+				err = doc.Content.parse(p, blk)
+			default:
+				// discard lines inside this block
+				for err == nil {
+					if !p.Empty() && p.Indent() <= blk.Indent() {
+						break
+					}
+					err = p.Next()
+				}
+			}
+		}
+	}
+	if err == io.EOF {
+		err = nil
+	}
+	return
+}
+
+func (doc *Document) Write(w io.Writer) error {
+	bw := bufio.NewWriter(w)
+	if doc.Title != "" {
+		if err := writeIndent(bw, "title", 0); err != nil {
+			return err
+		}
+		if err := writeIndent(bw, Escape(doc.Title), 1); err != nil {
+			return err
+		}
+	}
+	if len(doc.Links) > 0 {
+		if err := writeIndent(bw, "links", 0); err != nil {
+			return err
+		}
+		for _, link := range doc.Links {
+			if err := link.WriteIndent(bw, 1); err != nil {
+				return err
+			}
+		}
+	}
+	if len(doc.Site.Children) > 0 {
+		if err := writeIndent(bw, "site", 0); err != nil {
+			return err
+		}
+		for _, site := range doc.Site.Children {
+			if err := site.WriteIndent(bw, 1); err != nil {
+				return err
+			}
+		}
+	}
+	if doc.Content != nil {
+		if err := doc.Content.WriteIndent(bw, 0); err != nil {
+			return err
+		}
+	}
+	return bw.Flush()
+}
+
+func (doc *Document) parseTitle(p *Parser, block *TokenBlock) (err error) {
+	s, err := getSimpleText(p, block)
+	if doc.Title == "" {
+		doc.Title = s
+	} else {
+		doc.Title += " " + s
+	}
+	return
+}
+
+func (doc *Document) parseLinks(p *Parser, block *TokenBlock) (err error) {
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.Block()
+		if blk, ok := token.(*TokenBlock); ok {
+			if blk.Name == "" {
+				err = parseUnknown(p, blk)
+			} else {
+				link := Link{
+					URL:  blk.Name,
+					Name: strings.Join(blk.Args, " "),
+				}
+				doc.Links = append(doc.Links, link)
+				if err = p.Next(); err != nil {
+					break
+				}
+				doc.Links[len(doc.Links)-1].Description, err = getSimpleText(p, blk)
+			}
+		}
+	}
+	return
+}
+
+func getSimpleText(p *Parser, block *TokenBlock) (s string, err error) {
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.SimpleText()
+		if text, ok := token.(*TokenSimpleText); ok && text.Text != "" {
+			if s == "" {
+				s = text.Text
+			} else {
+				s += " " + text.Text
+			}
+		}
+
+		err = p.Next()
+	}
+	return
+}
+
+// Link represents a document-level hyperlink in the "links" top-level block.
+type Link struct {
+	// URL is the hyperlink URL.
+	URL string
+
+	// Name is the hyperlink text.
+	Name string
+
+	// Description is the description of the hyperlink.
+	Description string
+}
+
+// WriteIndent writes the link URL, name and description indented by n tabs.
+func (link Link) WriteIndent(w io.Writer, n int) error {
+	s := Escape(link.URL)
+	if link.Name != "" {
+		s += " " + Escape(link.Name)
+	}
+	if err := writeIndent(w, s, n); err != nil {
+		return err
+	}
+	if link.Description != "" {
+		if err := writeIndent(w, Escape(link.Description), n+1); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Site represents a node in the sitemap in the "site" top-level block.
+type Site struct {
+	// Path is the node's path fragment.
+	Path string
+
+	// Name is the node's name.
+	Name string
+
+	// Children are the nodes below this node.
+	Children []Site
+}
+
+// WriteIndent writes the sitemap indented by n tabs.
+func (site Site) WriteIndent(w io.Writer, n int) error {
+	s := Escape(site.Path)
+	if site.Name != "" {
+		s += " " + Escape(site.Name)
+	}
+	if err := writeIndent(w, s, n); err != nil {
+		return err
+	}
+	for _, ch := range site.Children {
+		if err := ch.WriteIndent(w, n+1); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (site *Site) parse(p *Parser, block *TokenBlock) (err error) {
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+
+		token := p.Block()
+		if blk, ok := token.(*TokenBlock); ok {
+			if blk.Name == "" {
+				err = parseUnknown(p, blk)
+			} else {
+				s := Site{
+					Path: strings.Trim(path.Clean(blk.Name), "/"),
+					Name: strings.Join(blk.Args, " "),
+				}
+				site.Children = append(site.Children, s)
+				if err = p.Next(); err != nil {
+					break
+				}
+				err = site.Children[len(site.Children)-1].parse(p, blk)
+			}
+		} else {
+			err = p.Next()
+		}
+	}
+	return
+}
+
+func parseUnknown(p *Parser, block *TokenBlock) (err error) {
+	err = p.Next()
+	for err == nil {
+		if !p.Empty() && p.Indent() <= block.Indent() {
+			break
+		}
+		// discard lines inside this block
+		err = p.Next()
+	}
+	return
+}
+
+func writeIndent(w io.Writer, s string, depth int) error {
+	const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+	if s == "" {
+		_, err := w.Write([]byte{'\n'})
+		return err
+	}
+	if depth == 0 {
+		_, err := w.Write([]byte(s + "\n"))
+		return err
+	}
+
+	var ind string
+	if depth <= len(tabs) {
+		ind = tabs[:depth]
+	} else {
+		ind = strings.Repeat("\t", depth)
+	}
+	_, err := w.Write([]byte(ind + s + "\n"))
+	return err
+}
diff --git a/parse.go b/parse.go
new file mode 100644
index 0000000..02f7eb2
--- /dev/null
+++ b/parse.go
@@ -0,0 +1,189 @@
+package cnm
+
+import (
+	"bufio"
+	"io"
+)
+
+// TopLevel represents the top-level block.
+var TopLevel = &TokenBlock{
+	TokenLine: TokenLine{
+		Indentation: -1,
+		RawLine:     "",
+		LineNo:      0,
+	},
+	Parent: nil,
+	Name:   "",
+	Args:   nil,
+}
+
+// Parser parses a CNM document by lines.
+type Parser struct {
+	r       *bufio.Reader
+	line    int
+	block   *TokenBlock
+	current *TokenLine
+	end     bool
+}
+
+// NewParser creates a new Parser that reads from r.
+func NewParser(r io.Reader) *Parser {
+	return &Parser{
+		r:       bufio.NewReader(r),
+		line:    0,
+		block:   TopLevel,
+		current: nil,
+		end:     false,
+	}
+}
+
+// Line returns the number of the last parsed line in the document, starting
+// with 1 after the first line.
+func (p *Parser) Line() int {
+	return p.line
+}
+
+// Next retrieves the next line.
+func (p *Parser) Next() error {
+	line, err := p.nextLine()
+	if err != nil {
+		return err
+	}
+	indent := 0
+	for _, c := range line {
+		if c != '\t' {
+			break
+		}
+		indent++
+	}
+	if indent > p.block.Indent()+1 {
+		indent = p.block.Indent() + 1
+	}
+	p.current = &TokenLine{
+		Indentation: indent,
+		RawLine:     line,
+		LineNo:      p.line,
+	}
+	if p.current.Indent() <= p.block.Indent() && !p.Empty() {
+		p.block = p.block.Parent
+	}
+	return nil
+}
+
+// Indent returns the indentation of the current line.
+//
+// Returns -1 if no line has been read yet.
+func (p *Parser) Indent() int {
+	if p.current == nil {
+		return -1
+	}
+	return p.current.Indent()
+}
+
+// Empty returns true if the current line is empty.
+func (p *Parser) Empty() bool {
+	if p.current == nil {
+		return true
+	}
+	if p.current.Indent() == len(p.current.Raw()) {
+		return true
+	}
+	return false
+}
+
+// Block parses the current line in block mode.
+//
+// Returns a TokenBlock if the line was not empty, otherwise TokenEmptyLine. In
+// block mode, a line is empty even if its indentation exceeds the block
+// content indentation, as long as it only contains tab characters.
+//
+// Next() must have been called before calling Block().
+func (p *Parser) Block() Token {
+	line := p.current.Raw()[p.current.Indent():]
+
+	/*indent := 0
+	for _, c := range line {
+		if c != '\t' {
+			break
+		}
+		indent++
+	}
+	if len(line) == indent {
+		return &TokenEmptyLine{*p.current}
+	}*/
+
+	ss := SplitUnescape(line)
+	if len(ss) == 0 || len(ss) == 1 && ss[0] == "" {
+		return &TokenEmptyLine{*p.current}
+	}
+
+	block := TokenBlock{
+		TokenLine: *p.current,
+		Parent:    p.block,
+	}
+	block.Name = ss[0]
+	if len(ss) > 1 {
+		block.Args = ss[1:]
+	}
+
+	p.block = &block
+
+	return &block
+}
+
+// RawText parses the current line as raw text.
+//
+// Returns a TokenRawText if the line was not empty, otherwise
+// TokenEmptyLine.
+//
+// Next() must have been called before calling RawText().
+func (p *Parser) RawText() Token {
+	if p.Empty() {
+		return &TokenEmptyLine{*p.current}
+	}
+	return &TokenRawText{
+		TokenLine: *p.current,
+		Text:      p.current.Raw()[p.current.Indent():],
+	}
+}
+
+// SimpleText parses the current line as simple text.
+//
+// Returns a TokenSimpleText if the line was not empty, otherwise
+// TokenEmptyLine.
+//
+// Next() must have been called before calling SimpleText().
+func (p *Parser) SimpleText() Token {
+	if p.Empty() {
+		return &TokenEmptyLine{*p.current}
+	}
+	return &TokenSimpleText{
+		TokenLine: *p.current,
+		Text:      ParseSimpleText(p.current.Raw()[p.current.Indent():]),
+	}
+}
+
+func (p *Parser) nextLine() (string, error) {
+	l, err := p.r.ReadString('\n')
+	if err == io.EOF {
+		if l != "" {
+			err = nil
+		} else if !p.end { // XXX
+			l = "\n"
+			p.end = true
+			err = nil
+		}
+	}
+	rs := make([]rune, len(l))
+	ri := 0
+	for _, r := range l {
+		switch r {
+		case '\n', '\r', '\x00':
+			continue
+		}
+		rs[ri] = r
+		ri++
+	}
+	p.line++
+	return string(rs[:ri]), err
+}
diff --git a/parse_test.go b/parse_test.go
new file mode 100644
index 0000000..b47dc14
--- /dev/null
+++ b/parse_test.go
@@ -0,0 +1,701 @@
+package cnm
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+
+	"github.com/davecgh/go-spew/spew"
+)
+
+var parseTests = map[string]*Document{
+	"": &Document{},
+
+	"foo\n\tbar\ntitle\n\ttest": &Document{
+		Title: "test",
+	},
+
+	"foo\n\tbar\ntitle\n\ttest\nfoo\n\tbar": &Document{
+		Title: "test",
+	},
+
+	"title\n\ttest": &Document{
+		Title: "test",
+	},
+
+	"\ntitle\n\ttest\n": &Document{
+		Title: "test",
+	},
+
+	"title\n\ttest\n\n": &Document{
+		Title: "test",
+	},
+
+	"\ntitle\n\t\t\t\t\t\n\t\tfoo bar": &Document{
+		Title: "foo bar",
+	},
+
+	"site\n\tfoo\nsite\n\t\tbar\n": &Document{
+		Site: Site{Children: []Site{
+			Site{Path: "foo"},
+		}},
+	},
+
+	"content\n\ttext\n\t\tfoo\ncontent\n\t\tbar\n": &Document{
+		Content: &ContentBlock{
+			name: "content",
+			children: []Block{
+				&TextBlock{
+					Format: "",
+					Contents: TextPlainContents{
+						Paragraphs: []string{"foo"},
+					},
+				},
+			},
+		},
+	},
+
+	"\n\ttitle\n\t\t\t\t\t\n\tfoo bar": &Document{},
+
+	"\ttitle\n\t\tfoo\n": &Document{},
+
+	"\tsite\n\t\tfoo\n": &Document{},
+
+	"\tlinks\n\t\tfoo\n": &Document{},
+
+	"links\n\tfoo": &Document{
+		Links: []Link{
+			Link{
+				URL: "foo",
+			},
+		},
+	},
+
+	"qwe\ntitle\n\tasd": &Document{
+		Title: "asd",
+	},
+
+	"links\n\t qwe\n\tasd": &Document{
+		Links: []Link{
+			Link{URL: "asd"},
+		},
+	},
+
+	"site\n\t qwe\n\tasd": &Document{
+		Site: Site{Children: []Site{
+			Site{Path: "asd"},
+		}},
+	},
+
+	"site\n\tba\\nr": &Document{
+		Site: Site{
+			Children: []Site{
+				Site{
+					Path: "ba\nr",
+				},
+			},
+		},
+	},
+
+	"site\n\t\t\t\tba\\nr": &Document{},
+
+	"site\n\tfoo\tbar": &Document{
+		Site: Site{
+			Children: []Site{
+				Site{
+					Path: "foo",
+					Name: "bar",
+				},
+			},
+		},
+	},
+
+	"\t\tsite\n\t\t\t\tfoo": &Document{},
+
+	"\tsite\n\tbar": &Document{},
+
+	"content\n\tsection test\n": &Document{
+		Content: &ContentBlock{
+			name: "content",
+			args: nil,
+			children: []Block{
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: []string{"test"},
+				}},
+			},
+		},
+	},
+
+	"content\n\tnosuchblock\n\tsection test\n": &Document{
+		Content: &ContentBlock{
+			name: "content",
+			args: nil,
+			children: []Block{
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: []string{"test"},
+				}},
+			},
+		},
+	},
+
+	"content\n\tnosuchblock\n\tsection test\n\n\tnosuchblock2": &Document{
+		Content: &ContentBlock{
+			name: "content",
+			args: nil,
+			children: []Block{
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: []string{"test"},
+				}},
+			},
+		},
+	},
+
+	"content\n\tsection\n\t\tnosuchblock\n\t\tsection\n\t\t\ttext\n\t\t\t\ttest": &Document{
+		Content: &ContentBlock{
+			name: "content",
+			args: nil,
+			children: []Block{
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: nil,
+					children: []Block{
+						&SectionBlock{ContentBlock{
+							name: "section",
+							args: nil,
+							children: []Block{
+								&TextBlock{
+									Format: "",
+									Contents: TextPlainContents{Paragraphs: []string{
+										"test",
+									}},
+								},
+							},
+						}},
+					},
+				}},
+			},
+		},
+	},
+
+	"content\n\ttable\n\t\tnosuchblock\n\t\trow\n\t\t\ttext\n\t\t\t\ttest": &Document{
+		Content: &ContentBlock{
+			name: "content",
+			args: nil,
+			children: []Block{
+				&TableBlock{rows: []Block{
+					&RowBlock{ContentBlock{
+						name: "row",
+						args: nil,
+						children: []Block{
+							&TextBlock{
+								Format: "",
+								Contents: TextPlainContents{Paragraphs: []string{
+									"test",
+								}},
+							},
+						},
+					}},
+				}},
+			},
+		},
+	},
+
+	"site\n\t\ttest\n\tfoo\\ bar baz\n" +
+		"links\n\tfoo\\ bar baz\n\t\t\tquux\n" +
+		"content\n" +
+		"links\n\t\ttest\n" +
+		"content\n" +
+		"\tsection\n\tsection qweasd\n" +
+		"\tsection foo\\ bar  baz\n\t\ttext\n\t\t\ttest\n": &Document{
+		Site: Site{
+			Children: []Site{
+				Site{
+					Path: "foo bar",
+					Name: "baz",
+				},
+			},
+		},
+		Links: []Link{
+			Link{
+				URL:         "foo bar",
+				Name:        "baz",
+				Description: "quux",
+			},
+		},
+		Content: &ContentBlock{
+			name: "content",
+			args: nil,
+			children: []Block{
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: nil,
+				}},
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: []string{"qweasd"},
+				}},
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: []string{"foo bar baz"},
+					children: []Block{
+						&TextBlock{
+							Contents: TextPlainContents{[]string{
+								"test",
+							}},
+						},
+					},
+				}},
+			},
+		},
+	},
+
+	"title\n\tfoo bar\n" +
+		"links\n\tqwe asd\n\tzxc 123\n" +
+		"site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n" +
+		"title\n\tbaz\n" +
+		"links\n\tfoo\n" +
+		"site\n\ttest": &Document{
+		Title: "foo bar baz",
+		Links: []Link{
+			Link{"qwe", "asd", ""},
+			Link{"zxc", "123", ""},
+			Link{"foo", "", ""},
+		},
+		Site: Site{
+			Children: []Site{
+				Site{
+					Path: "foo",
+					Children: []Site{
+						Site{Path: "bar"},
+						Site{
+							Path: "baz/quux",
+							Children: []Site{
+								Site{Path: "123"},
+							},
+						},
+					},
+				},
+				Site{Path: "test"},
+			},
+		},
+	},
+
+	`
+thing stuff
+	whatever
+title blah
+
+
+	Test 
+
+title 
+	  document
+
+content
+	section   Test   section  
+		text
+			This is \n just a
+		text pre
+
+			  t  e \n s  t  
+				
+
+
+			preformatted text
+
+		raw text/plain
+			of various \n features
+	
+		section of\ the
+			table
+				header
+					text
+						Column 1
+					text
+						Column 2
+				row
+					text
+						CNM
+					text
+						document
+										
+						format
+				row
+					section
+						lorem
+					list
+						text
+							ipsum
+						list ordered
+							list unordered
+								text
+									dolor
+			
+									sit
+									amet
+	embed text/cnm cnp://example.com/ stuff
+		thing
+		whatever
+`: &Document{
+		Title: "Test document",
+		Content: &ContentBlock{
+			name: "content",
+			args: nil,
+			children: []Block{
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: []string{"Test section"},
+					children: []Block{
+						&TextBlock{
+							Contents: TextPlainContents{[]string{
+								"This is \n just a",
+							}},
+						},
+						&TextBlock{
+							Format: "pre",
+							Contents: TextPreContents{
+								"  t  e \n s  t  \n\t\n\n\npreformatted text",
+							},
+						},
+						&RawBlock{
+							Syntax:   "text/plain",
+							Contents: "of various \\n features",
+						},
+						&SectionBlock{ContentBlock{
+							name: "section",
+							args: []string{"of the"},
+							children: []Block{
+								&TableBlock{[]Block{
+									&HeaderBlock{ContentBlock{
+										name: "header",
+										args: nil,
+										children: []Block{
+											&TextBlock{
+												Contents: TextPlainContents{[]string{
+													"Column 1",
+												}},
+											},
+											&TextBlock{
+												Contents: TextPlainContents{[]string{
+													"Column 2",
+												}},
+											},
+										},
+									}},
+									&RowBlock{ContentBlock{
+										name: "row",
+										args: nil,
+										children: []Block{
+											&TextBlock{
+												Contents: TextPlainContents{[]string{
+													"CNM",
+												}},
+											},
+											&TextBlock{
+												Contents: TextPlainContents{[]string{
+													"document",
+													"format",
+												}},
+											},
+										},
+									}},
+									&RowBlock{ContentBlock{
+										name: "row",
+										args: nil,
+										children: []Block{
+											&SectionBlock{ContentBlock{
+												name: "section",
+												args: nil,
+											}},
+											&ListBlock{ContentBlock{
+												name: "list",
+												args: nil,
+												children: []Block{
+													&TextBlock{
+														Contents: TextPlainContents{[]string{
+															"ipsum",
+														}},
+													},
+													&ListBlock{ContentBlock{
+														name: "list",
+														args: []string{"ordered"},
+														children: []Block{
+															&ListBlock{ContentBlock{
+																name: "list",
+																args: []string{"unordered"},
+																children: []Block{
+																	&TextBlock{
+																		Contents: TextPlainContents{[]string{
+																			"dolor",
+																			"sit amet",
+																		}},
+																	},
+																},
+															}},
+														},
+													}},
+												},
+											}},
+										},
+									}},
+								}},
+							},
+						}},
+					},
+				}},
+				&EmbedBlock{
+					Type:        "text/cnm",
+					URL:         "cnp://example.com/",
+					Description: "thing whatever",
+				},
+			},
+		},
+	},
+}
+
+func TestParse(t *testing.T) {
+	for k, v := range parseTests {
+		t.Run(k, func(t *testing.T) {
+			d, err := ParseDocument(strings.NewReader(k))
+			if err != nil {
+				t.Fatalf("ParseDocument(%q): error: %v", k, err)
+			}
+			if !documentEqual(d, v) {
+				t.Fatalf("ParseDocument(%q):\nexpected:\n%s\n     got:\n%s", k, reprDoc(v), reprDoc(d))
+			}
+		})
+	}
+}
+
+func reprDoc(d *Document) string {
+	//return fmt.Sprintf("Document{Title: %q, Links: %+v, Site: %+v, Content: %s}", d.Title, d.Links, d.Site, reprContent(d.Content))
+	return spew.Sdump(d)
+}
+
+func documentEqual(a, b *Document) bool {
+	if a.Title != b.Title {
+		return false
+	}
+	if len(a.Links) != len(b.Links) {
+		return false
+	}
+	for i := range a.Links {
+		if !linkEqual(a.Links[i], b.Links[i]) {
+			return false
+		}
+	}
+	if !siteEqual(a.Site, b.Site) {
+		return false
+	}
+	if !contentBlockEqual(a.Content, b.Content) {
+		return false
+	}
+	return true
+}
+
+func linkEqual(a, b Link) bool {
+	return a == b
+}
+
+func siteEqual(a, b Site) bool {
+	if a.Path != b.Path {
+		return false
+	}
+	if a.Name != b.Name {
+		return false
+	}
+	if len(a.Children) != len(b.Children) {
+		return false
+	}
+	for i := range a.Children {
+		if !siteEqual(a.Children[i], b.Children[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+func blockEqual(a, b Block) bool {
+	switch va := a.(type) {
+	case *SectionBlock:
+		vb, ok := b.(*SectionBlock)
+		if !ok {
+			return false
+		}
+		return sectionBlockEqual(va, vb)
+
+	case *TextBlock:
+		vb, ok := b.(*TextBlock)
+		if !ok {
+			return false
+		}
+		return textBlockEqual(va, vb)
+
+	case *RawBlock:
+		vb, ok := b.(*RawBlock)
+		if !ok {
+			return false
+		}
+		return rawBlockEqual(va, vb)
+
+	case *ListBlock:
+		vb, ok := b.(*ListBlock)
+		if !ok {
+			return false
+		}
+		return listBlockEqual(va, vb)
+
+	case *TableBlock:
+		vb, ok := b.(*TableBlock)
+		if !ok {
+			return false
+		}
+		return tableBlockEqual(va, vb)
+
+	case *HeaderBlock:
+		vb, ok := b.(*HeaderBlock)
+		if !ok {
+			return false
+		}
+		return headerBlockEqual(va, vb)
+
+	case *RowBlock:
+		vb, ok := b.(*RowBlock)
+		if !ok {
+			return false
+		}
+		return rowBlockEqual(va, vb)
+
+	case *EmbedBlock:
+		vb, ok := b.(*EmbedBlock)
+		if !ok {
+			return false
+		}
+		return embedBlockEqual(va, vb)
+
+	case *ContentBlock:
+		vb, ok := b.(*ContentBlock)
+		if !ok {
+			return false
+		}
+		return contentBlockEqual(va, vb)
+
+	default: // shouldn't happen
+		return false
+	}
+}
+
+func contentBlockEqual(a, b *ContentBlock) bool {
+	if (a == nil) != (b == nil) {
+		return false
+	}
+	if a == nil {
+		return true
+	}
+	if a.Name() != b.Name() {
+		return false
+	}
+	aa, ba := a.Args(), b.Args()
+	if len(aa) != len(ba) {
+		return false
+	}
+	for i := range aa {
+		if aa[i] != ba[i] {
+			return false
+		}
+	}
+	ca, cb := a.Children(), b.Children()
+	if len(ca) != len(cb) {
+		return false
+	}
+	for i := range ca {
+		if !blockEqual(ca[i], cb[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+func sectionBlockEqual(a, b *SectionBlock) bool {
+	return contentBlockEqual(&a.ContentBlock, &b.ContentBlock)
+}
+
+func textBlockEqual(a, b *TextBlock) bool {
+	if a.Format != b.Format {
+		return false
+	}
+	return textContentsEqual(a.Contents, b.Contents)
+}
+
+func textContentsEqual(a, b TextContents) bool {
+	switch va := a.(type) {
+	case TextPlainContents:
+		vb, ok := b.(TextPlainContents)
+		if !ok {
+			return false
+		}
+		return textPlainContentsEqual(va, vb)
+
+	case TextPreContents:
+		vb, ok := b.(TextPreContents)
+		if !ok {
+			return false
+		}
+		return textPreContentsEqual(va, vb)
+
+	default:
+		return reflect.TypeOf(a) == reflect.TypeOf(b) && reflect.DeepEqual(a, b)
+	}
+}
+
+func textPlainContentsEqual(a, b TextPlainContents) bool {
+	if len(a.Paragraphs) != len(b.Paragraphs) {
+		return false
+	}
+	for i := range a.Paragraphs {
+		if a.Paragraphs[i] != b.Paragraphs[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func textPreContentsEqual(a, b TextPreContents) bool {
+	return a == b
+}
+
+func rawBlockEqual(a, b *RawBlock) bool {
+	return *a == *b
+}
+
+func listBlockEqual(a, b *ListBlock) bool {
+	return contentBlockEqual(&a.ContentBlock, &b.ContentBlock)
+}
+
+func tableBlockEqual(a, b *TableBlock) bool {
+	ra, rb := a.Rows(), b.Rows()
+	if len(ra) != len(rb) {
+		return false
+	}
+	for i := range ra {
+		if !blockEqual(ra[i], rb[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+func rowBlockEqual(a, b *RowBlock) bool {
+	return contentBlockEqual(&a.ContentBlock, &b.ContentBlock)
+}
+
+func headerBlockEqual(a, b *HeaderBlock) bool {
+	return contentBlockEqual(&a.ContentBlock, &b.ContentBlock)
+}
+
+func embedBlockEqual(a, b *EmbedBlock) bool {
+	return *a == *b
+}
diff --git a/simpletext.go b/simpletext.go
new file mode 100644
index 0000000..78e089e
--- /dev/null
+++ b/simpletext.go
@@ -0,0 +1,196 @@
+package cnm
+
+import (
+	"bytes"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// ParseSimpleText parses raw as simple text (collapses whitespace and resolves
+// escape sequences).
+func ParseSimpleText(raw string) string {
+	return Unescape(CollapseWhitespace(raw))
+}
+
+// CollapseWhitespace collapses sequences of non-escaped whitespace in raw CNM
+// simple text into single spaces.
+func CollapseWhitespace(raw string) string {
+	s := strings.Join(strings.FieldsFunc(raw, func(r rune) bool {
+		switch r {
+		case '\t', '\n', '\f', '\r':
+			return true
+		}
+		return false
+	}), " ")
+
+	var buf bytes.Buffer
+	first := true
+	escape := false
+	space := false
+	for _, r := range s {
+		switch r {
+		case '\\':
+			if escape {
+				buf.WriteString("\\\\")
+				escape = false
+			} else {
+				escape = true
+			}
+			if space && !first {
+				buf.WriteByte(' ')
+			}
+			space = false
+			first = false
+		case ' ':
+			if escape {
+				buf.WriteString("\\ ")
+				escape = false
+			} else {
+				space = true
+			}
+		default:
+			if escape {
+				buf.WriteByte('\\')
+			}
+			if space && !first {
+				buf.WriteByte(' ')
+			}
+			buf.WriteRune(r)
+			escape = false
+			space = false
+			first = false
+		}
+	}
+	if escape {
+		buf.WriteByte('\\')
+	}
+	return buf.String()
+}
+
+var escapeRe = regexp.MustCompile(`[\t\n\f\r\\\x00]|^ | $|  `)
+
+// Escape escapes whitespace, backslash and and U+0000 within s.
+func Escape(s string) string {
+	return escapeRe.ReplaceAllStringFunc(s, func(match string) string {
+		switch match {
+		case "\t":
+			return `\t`
+		case "\n":
+			return `\n`
+		case "\f":
+			return `\f`
+		case "\r":
+			return `\r`
+		case "\\":
+			return `\\`
+		case "\x00":
+			return `\x00`
+		case " ":
+			return `\ `
+		case "  ":
+			return ` \ `
+		}
+		return match // this shouldn't happen
+	})
+}
+
+var escapeSpaceRe = regexp.MustCompile(`[\t\n]|^ | $|  `)
+
+// EscapeSpace works like Escape, except it only escapes spaces, tabs and line
+// feeds.
+func EscapeSpace(s string) string {
+	return escapeSpaceRe.ReplaceAllStringFunc(s, func(match string) string {
+		switch match {
+		case "\t":
+			return `\t`
+		case "\n":
+			return `\n`
+		case " ":
+			return `\ `
+		case "  ":
+			return ` \ `
+		}
+		return match // this shouldn't happen
+	})
+}
+
+var escapeNonspaceRe = regexp.MustCompile(`[\f\r\\\x00]`)
+
+// EscapeNonspace works like Escape, except it does not escape spaces, tabs and
+// line feeds.
+func EscapeNonspace(s string) string {
+	return escapeNonspaceRe.ReplaceAllStringFunc(s, func(match string) string {
+		switch match {
+		case "\f":
+			return `\f`
+		case "\r":
+			return `\r`
+		case "\\":
+			return `\\`
+		case "\x00":
+			return `\x00`
+		}
+		return match // this shouldn't happen
+	})
+}
+
+var unescapeRe = regexp.MustCompile(`\\(?:[btnvfr \\]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})`)
+
+// Unescape resolves escape sequences in simple text.
+func Unescape(s string) string {
+	return unescapeRe.ReplaceAllStringFunc(s, func(match string) string {
+		switch match[1] {
+		case 'b':
+			return "\b"
+		case 't':
+			return "\t"
+		case 'n':
+			return "\n"
+		case 'v':
+			return "\v"
+		case 'f':
+			return "\f"
+		case 'r':
+			return "\r"
+		case ' ':
+			return " "
+		case '\\':
+			return "\\"
+		case 'x':
+			n, _ := strconv.ParseUint(match[2:], 16, 8)
+			return string(n)
+		case 'u':
+			n, _ := strconv.ParseUint(match[2:], 16, 16)
+			return string(n)
+		case 'U':
+			n, _ := strconv.ParseUint(match[2:], 16, 32)
+			return string(n)
+		}
+		return match // this shouldn't happen
+	})
+}
+
+var splitRe = regexp.MustCompile(`((?:[^\t\n\f\r\\ ]|\\.?)+|^)`)
+
+// SplitUnescape splits the string s by whitespace, then unescapes simple text
+// escape sequences.
+func SplitUnescape(s string) []string {
+	ss := splitRe.FindAllString(s, -1)
+	for i := range ss {
+		ss[i] = Unescape(ss[i])
+	}
+	return ss
+}
+
+// JoinEscape escapes each argument using simple text escape sequences and then
+// joins them with spaces.
+func JoinEscape(ss []string) string {
+	var l []string
+	for _, s := range ss {
+		if s != "" {
+			l = append(l, Escape(s))
+		}
+	}
+	return strings.Join(l, " ")
+}
diff --git a/simpletext_test.go b/simpletext_test.go
new file mode 100644
index 0000000..8fdf754
--- /dev/null
+++ b/simpletext_test.go
@@ -0,0 +1,180 @@
+package cnm
+
+import "testing"
+
+var simpleEscapes = map[string]string{
+	"":        ``,
+	"ContNet": `ContNet`,
+	"\t":      `\t`,
+	"\n":      `\n`,
+	"\f":      `\f`,
+	"\r":      `\r`,
+	" ":       `\ `,
+	"\\":      `\\`,
+	"\x00":    `\x00`,
+	"       ": `\  \  \  \ `,
+	"      ":  `\  \  \ \ `,
+	"     ":   `\  \  \ `,
+	"    ":    `\  \ \ `,
+	"   ":     `\  \ `,
+	"  ":      `\ \ `,
+	"\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff",
+}
+
+func TestEscape(t *testing.T) {
+	for k, v := range simpleEscapes {
+		t.Run(k, func(t *testing.T) {
+			e := Escape(k)
+			if e != v {
+				t.Errorf("Escape(%q) -> %q, expected %q", k, e, v)
+			}
+		})
+	}
+}
+
+var nonspaceEscapes = map[string]string{
+	"":        ``,
+	"ContNet": `ContNet`,
+	"\t":      "\t",
+	"\n":      "\n",
+	"\f":      `\f`,
+	"\r":      `\r`,
+	" ":       ` `,
+	"\\":      `\\`,
+	"\x00":    `\x00`,
+	"       ": `       `,
+	"      ":  `      `,
+	"     ":   `     `,
+	"    ":    `    `,
+	"   ":     `   `,
+	"  ":      `  `,
+	"\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff",
+}
+
+func TestEscapeNonspace(t *testing.T) {
+	for k, v := range nonspaceEscapes {
+		t.Run(k, func(t *testing.T) {
+			e := EscapeNonspace(k)
+			if e != v {
+				t.Errorf("EscapeNonspace(%q) -> %q, expected %q", k, e, v)
+			}
+		})
+	}
+}
+
+var spaceEscapes = map[string]string{
+	"":        ``,
+	"ContNet": `ContNet`,
+	"\t":      `\t`,
+	"\n":      `\n`,
+	"\f":      "\f",
+	"\r":      "\r",
+	" ":       `\ `,
+	"\\":      `\`,
+	"\x00":    "\x00",
+	"       ": `\  \  \  \ `,
+	"      ":  `\  \  \ \ `,
+	"     ":   `\  \  \ `,
+	"    ":    `\  \ \ `,
+	"   ":     `\  \ `,
+	"  ":      `\ \ `,
+	"\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\x00\xff\u00ff\\xff",
+}
+
+func TestEscapeSpace(t *testing.T) {
+	for k, v := range spaceEscapes {
+		t.Run(k, func(t *testing.T) {
+			e := EscapeSpace(k)
+			if e != v {
+				t.Errorf("EscapeSpace(%q) -> %q, expected %q", k, e, v)
+			}
+		})
+	}
+}
+
+var simpleUnescapes = map[string]string{
+	``:                                         "",
+	`ContNet`:                                  "ContNet",
+	`\b`:                                       "\b",
+	`\t`:                                       "\t",
+	`\n`:                                       "\n",
+	`\v`:                                       "\v",
+	`\f`:                                       "\f",
+	`\r`:                                       "\r",
+	`\ `:                                       " ",
+	`\\`:                                       "\\",
+	`\`:                                        "\\",
+	`\x00`:                                     "\x00",
+	`a\nb\ c\rd\be\\f`:                         "a\nb c\rd\be\\f",
+	`\n\n\n`:                                   "\n\n\n",
+	`\x00\xff\n\x123`:                          "\x00\u00ff\n\x123",
+	"  \b\\b\t\n\v\f\r\\x00\x00\\\\xff":        "  \b\b\t\n\v\f\r\x00\x00\\xff",
+	`\xAA\xAa\xaA\xaa`:                         "\u00aa\u00aa\u00aa\u00aa",
+	`\x00\xfg`:                                 "\x00\\xfg",
+	`\\\\\\`:                                   "\\\\\\",
+	"\b5Ὂg̀9!\\n℃ᾭG":                           "\b5Ὂg̀9!\n℃ᾭG",
+	"\xff\\x00\xee\xaa\xee":                    "\xff\x00\xee\xaa\xee",
+	"\\x00\x10\\ \x30\x40":                     "\x00\x10\x20\x30\x40",
+	"\x10\x50\x90\xe0":                         "\x10\x50\x90\xe0",
+	`Hello,\ 世界`:                               "Hello, 世界",
+	"\xed\x9f\xbf":                             "\xed\x9f\xbf",
+	"\xee\x80\x80":                             "\xee\x80\x80",
+	"\xef\xbf\xbd":                             "\xef\xbf\xbd",
+	"\x80\x80\x80\x80":                         "\x80\x80\x80\x80",
+	`\ \  \  `:                                 "     ",
+	`\uffff\u0000\u0123\ufedc\ufffe`:           "\uffff\u0000\u0123\ufedc\ufffe",
+	`\Uffff0000\U0003fedc\U0010ffff\U00110000`: "\ufffd\U0003fedc\U0010ffff\ufffd",
+	`\x0x\u012x\U0123456x`:                     "\\x0x\\u012x\\U0123456x",
+	`\U0123456`:                                "\\U0123456",
+	`\u012`:                                    "\\u012",
+	`\x0`:                                      "\\x0",
+	`\x\u\U\a\z\0\-`:                           "\\x\\u\\U\\a\\z\\0\\-",
+}
+
+func TestUnescape(t *testing.T) {
+	for k, v := range simpleUnescapes {
+		t.Run(k, func(t *testing.T) {
+			u := Unescape(k)
+			if u != v {
+				t.Errorf("Unescape(%q) -> %q, expected %q", k, u, v)
+			}
+		})
+	}
+}
+
+var simpleTexts = map[string]string{
+	"foo":                         "foo",
+	"\n":                          "",
+	"\n\r \t\v\f":                 "\v",
+	" ":                           "",
+	`\ `:                          " ",
+	` \ `:                         " ",
+	`\  `:                         " ",
+	`\  \ `:                       "   ",
+	` \`:                          "\\",
+	`\`:                           "\\",
+	`    \   `:                    " ",
+	`     `:                       "",
+	`\  \  `:                      "   ",
+	` \  `:                        " ",
+	"  qwe  asd  ":                "qwe asd",
+	"\\  qwe\nasd\n\nzxc\\n123\n": "  qwe asd zxc\n123",
+	`\   \   \ \ \ `:              "       ",
+	` \ \   \   `:                 "    ",
+	` \\ `:                        "\\",
+	`\ \\ `:                       " \\",
+	` \\\ `:                       "\\ ",
+	` \ \\\  `:                    " \\ ",
+	`\  \\ \ `:                    "  \\  ",
+}
+
+func TestParseSimpleText(t *testing.T) {
+	for k, v := range simpleTexts {
+		t.Run(k, func(t *testing.T) {
+			u := ParseSimpleText(k)
+			if u != v {
+				t.Errorf("ParseSimpleText(%q) -> %q, expected %q", k, u, v)
+			}
+		})
+	}
+}
diff --git a/token.go b/token.go
new file mode 100644
index 0000000..a6b08b9
--- /dev/null
+++ b/token.go
@@ -0,0 +1,58 @@
+package cnm
+
+// Token represents a parsed line in a CNM document.
+type Token interface {
+	Indent() int
+	Raw() string
+	Line() int
+}
+
+// TokenLine represents an arbitrary CNM line.
+type TokenLine struct {
+	Indentation int
+	RawLine     string
+	LineNo      int
+}
+
+// Indent returns the indentation of the parsed line.
+func (t *TokenLine) Indent() int { return t.Indentation }
+
+// Raw returns the original unparsed line.
+func (t *TokenLine) Raw() string { return t.RawLine }
+
+// Line returns the line number in the document, starting from 1.
+func (t *TokenLine) Line() int { return t.LineNo }
+
+// TokenEmptyLine represents an empty line.
+//
+// A line is empty as long as it contains up to as many tab characters as the
+// line's indentation and nothing else.
+type TokenEmptyLine struct {
+	TokenLine
+}
+
+// TokenBlock represents a block header line.
+type TokenBlock struct {
+	TokenLine
+	// Parent is the parent block
+	Parent *TokenBlock
+	// Name is the block name.
+	Name string
+	// Args are the block arguments, split by whitespace and then parsed as
+	// simple text.
+	Args []string
+}
+
+// TokenSimpleText represents a line of simple text.
+type TokenSimpleText struct {
+	TokenLine
+	// Text is the line contents parsed as simple text.
+	Text string
+}
+
+// TokenRawText represents a non-empty line with unparsed contents.
+type TokenRawText struct {
+	TokenLine
+	// Text is the raw contents of the line with the indentation removed.
+	Text string
+}
diff --git a/write_test.go b/write_test.go
new file mode 100644
index 0000000..fc13459
--- /dev/null
+++ b/write_test.go
@@ -0,0 +1,218 @@
+package cnm
+
+import (
+	"bytes"
+	"testing"
+)
+
+var writeTests = map[string]*Document{
+	"": &Document{},
+
+	"title\n\tfoo bar\n": &Document{
+		Title: "foo bar",
+	},
+
+	"title\n\tfoo bar baz\n" +
+		"links\n\tqwe asd\n\tzxc 123\n\tfoo\n" +
+		"site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n\ttest\n": &Document{
+		Title: "foo bar baz",
+		Links: []Link{
+			Link{"qwe", "asd", ""},
+			Link{"zxc", "123", ""},
+			Link{"foo", "", ""},
+		},
+		Site: Site{
+			Children: []Site{
+				Site{
+					Path: "foo",
+					Children: []Site{
+						Site{Path: "bar"},
+						Site{
+							Path: "baz/quux",
+							Children: []Site{
+								Site{Path: "123"},
+							},
+						},
+					},
+				},
+				Site{Path: "test"},
+			},
+		},
+	},
+
+	`title
+	Test document
+content
+	section Test section
+		text
+			This is \n just a
+		text pre
+			  t  e 
+			 s  t  
+				
+		raw text/plain
+			of various \n features
+		section of the
+			table
+				header
+					text
+						Column 1
+					text
+						Column 2
+				row
+					text
+						CNM
+					text
+						document
+
+						format
+				row
+					section
+					list
+						text
+							ipsum
+						list ordered
+							list unordered
+								text
+									dolor
+
+									sit amet
+	embed text/cnm cnp://example.com/
+		thing whatever
+`: &Document{
+		Title: "Test document",
+		Content: &ContentBlock{
+			name: "content",
+			args: nil,
+			children: []Block{
+				&SectionBlock{ContentBlock{
+					name: "section",
+					args: []string{"Test", "section"},
+					children: []Block{
+						&TextBlock{
+							Contents: TextPlainContents{[]string{
+								"This is \n just a",
+							}},
+						},
+						&TextBlock{
+							Format: "pre",
+							Contents: TextPreContents{
+								"  t  e \n s  t  \n\t",
+							},
+						},
+						&RawBlock{
+							Syntax:   "text/plain",
+							Contents: "of various \\n features",
+						},
+						&SectionBlock{ContentBlock{
+							name: "section",
+							args: []string{"of the"},
+							children: []Block{
+								&TableBlock{[]Block{
+									&HeaderBlock{ContentBlock{
+										name: "header",
+										args: []string{},
+										children: []Block{
+											&TextBlock{
+												Contents: TextPlainContents{[]string{
+													"Column 1",
+												}},
+											},
+											&TextBlock{
+												Contents: TextPlainContents{[]string{
+													"Column 2",
+												}},
+											},
+										},
+									}},
+									&RowBlock{ContentBlock{
+										name: "row",
+										args: []string{},
+										children: []Block{
+											&TextBlock{
+												Contents: TextPlainContents{[]string{
+													"CNM",
+												}},
+											},
+											&TextBlock{
+												Contents: TextPlainContents{[]string{
+													"document",
+													"format",
+												}},
+											},
+										},
+									}},
+									&RowBlock{ContentBlock{
+										name: "row",
+										args: []string{""},
+										children: []Block{
+											&SectionBlock{ContentBlock{
+												name: "section",
+												args: []string{"", "", ""},
+											}},
+											&ListBlock{ContentBlock{
+												name: "list",
+												args: nil,
+												children: []Block{
+													&TextBlock{
+														Contents: TextPlainContents{[]string{
+															"ipsum",
+														}},
+													},
+													&ListBlock{ContentBlock{
+														name: "list",
+														args: []string{"ordered"},
+														children: []Block{
+															&ListBlock{ContentBlock{
+																name: "list",
+																args: []string{"unordered"},
+																children: []Block{
+																	&TextBlock{
+																		Contents: TextPlainContents{[]string{
+																			"dolor",
+																			"sit amet",
+																		}},
+																	},
+																},
+															}},
+														},
+													}},
+												},
+											}},
+										},
+									}},
+								}},
+							},
+						}},
+					},
+				}},
+				&EmbedBlock{
+					Type:        "text/cnm",
+					URL:         "cnp://example.com/",
+					Description: "thing whatever",
+				},
+			},
+		},
+	},
+}
+
+func TestWrite(t *testing.T) {
+	for k, v := range writeTests {
+		t.Run(k, func(t *testing.T) {
+			var buf bytes.Buffer
+			err := v.Write(&buf)
+			if err != nil {
+				t.Fatalf("Write error: %v", err)
+			}
+			w := buf.String()
+			t.Log("====================")
+			t.Log("expected:\n" + k)
+			t.Log("--------------------")
+			t.Log("     got:\n" + w)
+			t.Log("====================")
+			if k != w {
+				t.Fatal("Write: output did not match expected document")
+			}
+		})
+	}
+}
author	clsr <clsr@clsr.net>	2017-08-18 13:45:49 +0200
committer	clsr <clsr@clsr.net>	2017-08-18 13:45:49 +0200
commit	26248678aafc2f8e277d4bdafc116f2b349b02c5 (patch)
tree	15f82488edb8c05aae756443284731875f36737c
download	cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.tar.gz cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.zip