diff options
| -rw-r--r-- | cnmfmt/cnmfmt.go | 525 | ||||
| -rw-r--r-- | cnmfmt/cnmfmt_test.go | 457 | ||||
| -rw-r--r-- | content.go | 610 | ||||
| -rw-r--r-- | document.go | 278 | ||||
| -rw-r--r-- | parse.go | 189 | ||||
| -rw-r--r-- | parse_test.go | 701 | ||||
| -rw-r--r-- | simpletext.go | 196 | ||||
| -rw-r--r-- | simpletext_test.go | 180 | ||||
| -rw-r--r-- | token.go | 58 | ||||
| -rw-r--r-- | write_test.go | 218 | 
10 files changed, 3412 insertions, 0 deletions
| diff --git a/cnmfmt/cnmfmt.go b/cnmfmt/cnmfmt.go new file mode 100644 index 0000000..cb8dc64 --- /dev/null +++ b/cnmfmt/cnmfmt.go @@ -0,0 +1,525 @@ +// Package cnmfmt provides parsing and composition for CNMfmt formatting. +package cnmfmt // import "contnet.org/lib/cnm-go/cnmfmt" + +import ( +	"bytes" +	"io" +	"strings" + +	"contnet.org/lib/cnm-go" +) + +func init() { +	cnm.RegisterTextContentParser("fmt", parseTextFmt) +} + +// Text represents a paragraph of CNMfmt text. +type Text struct { +	// Spans are spans of formatted text. +	Spans []Span +} + +// ParseParagraph parses a single CNMfmt text paragraph s. +func ParseParagraph(s string) Text { +	s = cnm.CollapseWhitespace(s) + +	t := Text{} +	var buf bytes.Buffer +	format := Format{} +	last := rune(-1) +	url := false + +	for _, r := range s { +		if url && format.Link == "" { // need URL for link +			if handleURL(r, &last, &format, &buf) { +				continue +			} +		} + +		switch r { +		case '*', '/', '_', '`', '@': +			handleTag(r, &last, &t, &format, &buf, &url) + +		case '\\': +			if last == '\\' { +				buf.WriteString("\\\\") +				last = -1 +			} else { +				if last >= 0 { +					buf.WriteRune(last) +				} +				last = '\\' +			} + +		default: +			if last >= 0 { +				buf.WriteRune(last) +			} +			buf.WriteRune(r) +			last = -1 +		} +	} + +	if url && format.Link == "" { +		if last >= 0 { +			buf.WriteRune(last) +		} +		format.Link = Unescape(buf.String()) +		buf.Reset() +	} else if last >= 0 { +		buf.WriteRune(last) +	} +	last = -1 +	handleTag(-1, &last, &t, &format, &buf, &url) + +	t.trimUnescape() + +	return t +} + +func (t *Text) trimUnescape() { +	var spans []Span + +	for _, span := range t.Spans { +		if span.Text != "" || span.Format.Link != "" { +			spans = append(spans, span) +		} +	} +	t.Spans, spans = spans, nil + +	for i := len(t.Spans) - 1; i >= 0; i-- { +		span := t.Spans[i] +		if span.Text != "" || span.Format.Link != "" { +			spans = append(spans, span) +		} +	} +	for i := 0; i < len(spans)/2; i++ { +		spans[i], spans[len(spans)-1-i] = spans[len(spans)-1-i], spans[i] +	} +	t.Spans = spans + +	for i := range t.Spans { +		t.Spans[i].Text = Unescape(t.Spans[i].Text) +	} +} + +func (t *Text) appendSpan(format Format, txt string) { +	if txt != "" || format.Link != "" { +		t.Spans = append(t.Spans, Span{format, txt}) +	} +} + +func handleURL(r rune, last *rune, format *Format, buf *bytes.Buffer) bool { +	if r == '@' && *last == '@' { // end without text +		format.Link = Unescape(buf.String()) +		buf.Reset() +		return false +	} else if *last == '\\' { +		buf.WriteByte('\\') +		buf.WriteRune(r) +		*last = -1 +	} else if r == '\\' || r == '@' { +		*last = r +	} else if r != ' ' { // url +		buf.WriteRune(r) +	} else if buf.Len() > 0 { // space, then text +		format.Link = Unescape(buf.String()) +		buf.Reset() +	} // else: prefix space +	return true +} + +func handleTag(r rune, last *rune, txt *Text, format *Format, buf *bytes.Buffer, url *bool) { +	if *last == '\\' { +		buf.WriteRune(r) +		*last = -1 +	} else if *last == r { +		txt.appendSpan(*format, buf.String()) +		buf.Reset() +		switch r { +		case '*': +			format.Bold = !format.Bold +		case '/': +			format.Italic = !format.Italic +		case '_': +			format.Underline = !format.Underline +		case '`': +			format.Monospace = !format.Monospace +		case '@': +			format.Link = "" +			*url = !*url +		} +		*last = -1 +	} else { +		switch *last { +		case '*', '/', '_', '`', '@': +			buf.WriteRune(*last) +		} +		*last = r +	} +} + +// WriteIndent writes the formatted text indented by n tabs. +func (t Text) WriteIndent(w io.Writer, n int) error { +	var state [5]byte // bold, italic, underline, monospace, link +	si := 0 +	format := Format{} +	spans := EscapeSpans(t.Spans) +	var line []string +	for _, span := range spans { +		order := tagOrder(state[:si], format, span.Format) +		for _, f := range order { +			switch f { +			case '*': +				format.Bold = !format.Bold +				line = append(line, "**") +			case '/': +				format.Italic = !format.Italic +				line = append(line, "//") +			case '_': +				format.Underline = !format.Underline +				line = append(line, "__") +			case '`': +				format.Monospace = !format.Monospace +				line = append(line, "``") +			case '@': +				if format.Link != "" { +					line = append(line, "@@") +				} +				if span.Format.Link != "" { +					pad := "" +					if span.Text != "" { +						pad = " " +					} +					line = append(line, "@@", cnm.Escape(span.Format.Link), pad) +				} +			} +		} +		line = append(line, span.Text) +		si = cleanupTags(state[:], order, span.Format) +		format = span.Format +	} +	return writeIndent(w, strings.Join(line, ""), n) +} + +func tagOrder(state []byte, old, new Format) []byte { +	ldiff := "" +	if old.Link != new.Link { +		ldiff = "1" +	} +	diff := Format{ +		Bold:      old.Bold != new.Bold, +		Italic:    old.Italic != new.Italic, +		Underline: old.Underline != new.Underline, +		Monospace: old.Monospace != new.Monospace, +		Link:      ldiff, +	} + +	var order [5]byte +	oi := 0 +	for i := len(state) - 1; i >= 0; i-- { +		switch state[i] { +		case '*': +			if diff.Bold { +				order[oi] = '*' +				oi++ +				diff.Bold = false +			} +		case '/': +			if diff.Italic { +				order[oi] = '/' +				oi++ +				diff.Italic = false +			} +		case '_': +			if diff.Underline { +				order[oi] = '_' +				oi++ +				diff.Underline = false +			} +		case '`': +			if diff.Monospace { +				order[oi] = '`' +				oi++ +				diff.Monospace = false +			} +		case '@': +			if diff.Link != "" { +				order[oi] = '@' +				oi++ +				diff.Link = "" +			} +		} +	} + +	if diff.Bold { +		order[oi] = '*' +		oi++ +	} +	if diff.Italic { +		order[oi] = '/' +		oi++ +	} +	if diff.Underline { +		order[oi] = '_' +		oi++ +	} +	if diff.Monospace { +		order[oi] = '`' +		oi++ +	} +	if diff.Link != "" { +		order[oi] = '@' +		oi++ +	} + +	return order[:oi] +} + +func cleanupTags(state []byte, order []byte, format Format) int { +	var newState [10]byte +	copy(newState[:5], state) +	copy(newState[5:], order) +	for i := range newState { +		switch newState[i] { +		case '*': +			if !format.Bold { +				newState[i] = 0 +			} +		case '/': +			if !format.Italic { +				newState[i] = 0 +			} +		case '_': +			if !format.Underline { +				newState[i] = 0 +			} +		case '`': +			if !format.Monospace { +				newState[i] = 0 +			} +		case '@': +			if format.Link == "" { +				newState[i] = 0 +			} +		} +	} +	si := 0 +	for _, f := range newState { +		if f > 0 { +			state[si] = f +			si++ +		} +	} +	return si +} + +// Span represents a span of text with a format. +type Span struct { +	// Format is the format of the text. +	Format Format + +	// Text is the text content of the span. +	Text string +} + +// Format represents a state of CNMfmt formatting. +type Format struct { +	// Bold text. +	Bold bool + +	// Italic text. +	Italic bool + +	// Underlined text. +	Underline bool + +	// Monospaced text. +	Monospace bool + +	// Hyperlink URL (if non-empty). +	Link string +} + +// Escape escapes CNMfmt and CNM text special characters. +func Escape(s string) string { +	return EscapeFmt(cnm.Escape(s)) +} + +// EscapeSpans escapes CNMfmt and CNM text within spans. +// +// This function will not needlessly escape spaces at the start or end of a +// span if the sibling span contains nonspaces. +func EscapeSpans(spans []Span) []Span { +	// XXX: this is an ugly solution +	esc := make([]Span, len(spans)) +	for i := range spans { +		start := false +		end := false +		span := spans[i] +		if i+1 < len(spans) { +			s := spans[i+1].Text +			if len(s) > 0 && s[0] != ' ' { +				span.Text = span.Text + "x" +				end = true +			} +		} +		if i > 0 { +			s := spans[i-1].Text +			if len(s) > 0 && s[len(s)-1] != ' ' { +				span.Text = "x" + span.Text +				start = true +			} +		} +		span.Text = Escape(span.Text) +		if start { +			span.Text = span.Text[1:] +		} +		if end { +			span.Text = span.Text[:len(span.Text)-1] +		} +		esc[i] = span +	} +	return esc +} + +var escapeReplacer = strings.NewReplacer( +	`*`, `\*`, +	`/`, `\/`, +	`_`, `\_`, +	"`", "\\`", +	`@`, `\@`, +) + +// EscapeFmt escapes only CNMfmt format toggle characters. +func EscapeFmt(s string) string { +	return escapeReplacer.Replace(s) +} + +// Unescape resolves CNM text and CNMfmt escape sequences in s. +func Unescape(s string) string { +	return cnm.Unescape(UnescapeFmt(s)) +} + +var unescapeReplacer = strings.NewReplacer( +	`\\`, `\\`, +	`\*`, `*`, +	`\/`, `/`, +	`\_`, `_`, +	"\\`", "`", +	`\@`, `@`, +) + +// UnescapeFmt resolves only CNMfmt escape sequences in s. +func UnescapeFmt(s string) string { +	return unescapeReplacer.Replace(s) +} + +// TextFmtContents represents CNM `text fmt` contents. +type TextFmtContents struct { +	Paragraphs []Text +} + +// NewTextFmtBlock creates a new `text fmt` block containing provided CNMfmt +// paragraphs. +func NewTextFmtBlock(paragraphs []Text) *cnm.TextBlock { +	return cnm.NewTextBlock("fmt", TextFmtContents{paragraphs}) +} + +// WriteIndent writes the formatted text contents indented by n tabs. +func (tf TextFmtContents) WriteIndent(w io.Writer, n int) error { +	for i, p := range tf.Paragraphs { +		if i != 0 { +			if err := writeIndent(w, "", 0); err != nil { +				return err +			} +		} +		if err := p.WriteIndent(w, n); err != nil { +			return err +		} +	} +	return nil +} + +// Parse parses paragraphs of CNMfmt text. +func Parse(paragraphs string) []Text { +	var txt []Text +	var paragraph []string + +	for _, line := range strings.Split(paragraphs, "\n") { +		end := false +		if line != "" { +			if strings.Trim(line, "\n\r\t\f ") == "" { +				end = true +			} else { +				paragraph = append(paragraph, line) +			} +		} else if len(paragraph) > 0 { +			end = true +		} +		if end { +			txt = append(txt, ParseParagraph(strings.Join(paragraph, " "))) +			paragraph = nil +		} +	} +	if len(paragraph) > 0 { +		txt = append(txt, ParseParagraph(strings.Join(paragraph, " "))) +	} + +	return txt +} + +func writeIndent(w io.Writer, s string, depth int) error { +	const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" + +	if s == "" { +		_, err := w.Write([]byte{'\n'}) +		return err +	} +	if depth == 0 { +		_, err := w.Write([]byte(s + "\n")) +		return err +	} + +	var ind string +	if depth <= len(tabs) { +		ind = tabs[:depth] +	} else { +		ind = strings.Repeat("\t", depth) +	} +	_, err := w.Write([]byte(ind + s + "\n")) +	return err + +} + +func parseTextFmt(p *cnm.Parser, block *cnm.TokenBlock) (cnm.TextContents, error) { +	txt := TextFmtContents{} +	var paragraph []string +	var err error +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.RawText() +		end := false +		if text, ok := token.(*cnm.TokenRawText); ok { +			if strings.Trim(text.Text, "\n\r\t\f ") == "" { +				end = true +			} else { +				paragraph = append(paragraph, text.Text) +			} +		} else if _, ok := token.(*cnm.TokenEmptyLine); ok && len(paragraph) > 0 { +			end = true +		} +		if end { +			txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " "))) +			paragraph = nil +		} +		err = p.Next() +	} +	if len(paragraph) > 0 { +		txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " "))) +	} +	return txt, err +} diff --git a/cnmfmt/cnmfmt_test.go b/cnmfmt/cnmfmt_test.go new file mode 100644 index 0000000..89a40a9 --- /dev/null +++ b/cnmfmt/cnmfmt_test.go @@ -0,0 +1,457 @@ +package cnmfmt + +import ( +	"bytes" +	"io" +	"strings" +	"testing" + +	"contnet.org/lib/cnm-go" +) + +var parseTests = map[string]Text{ +	"\\nfoo\nbar\\": Text{[]Span{ +		Span{Format{}, "\nfoo bar\\"}, +	}}, +	"**foo": Text{[]Span{ +		Span{Format{Bold: true}, "foo"}, +	}}, +	"//foo": Text{[]Span{ +		Span{Format{Italic: true}, "foo"}, +	}}, +	"__foo": Text{[]Span{ +		Span{Format{Underline: true}, "foo"}, +	}}, +	"``foo": Text{[]Span{ +		Span{Format{Monospace: true}, "foo"}, +	}}, +	"foo*bar": Text{[]Span{ +		Span{Format{}, "foo*bar"}, +	}}, +	"foo*": Text{[]Span{ +		Span{Format{}, "foo*"}, +	}}, +	"foo**": Text{[]Span{ +		Span{Format{}, "foo"}, +	}}, +	"foo***": Text{[]Span{ +		Span{Format{}, "foo"}, +		Span{Format{Bold: true}, "*"}, +	}}, +	"foo****": Text{[]Span{ +		Span{Format{}, "foo"}, +	}}, +	"*foo": Text{[]Span{ +		Span{Format{}, "*foo"}, +	}}, +	"****foo": Text{[]Span{ +		Span{Format{}, "foo"}, +	}}, +	"******foo": Text{[]Span{ +		Span{Format{Bold: true}, "foo"}, +	}}, +	"foo ** bar": Text{[]Span{ +		Span{Format{}, "foo "}, +		Span{Format{Bold: true}, " bar"}, +	}}, +	"foo** bar": Text{[]Span{ +		Span{Format{}, "foo"}, +		Span{Format{Bold: true}, " bar"}, +	}}, +	"foo **bar": Text{[]Span{ +		Span{Format{}, "foo "}, +		Span{Format{Bold: true}, "bar"}, +	}}, +	"foo ** bar ** baz": Text{[]Span{ +		Span{Format{}, "foo "}, +		Span{Format{Bold: true}, " bar "}, +		Span{Format{}, " baz"}, +	}}, +	"foo ** bar** baz": Text{[]Span{ +		Span{Format{}, "foo "}, +		Span{Format{Bold: true}, " bar"}, +		Span{Format{}, " baz"}, +	}}, +	"**__**foo": Text{[]Span{ +		Span{Format{Underline: true}, "foo"}, +	}}, +	"***": Text{[]Span{ +		Span{Format{Bold: true}, "*"}, +	}}, +	"*\\**": Text{[]Span{ +		Span{Format{}, "***"}, +	}}, +	"\\*": Text{[]Span{ +		Span{Format{}, "*"}, +	}}, +	"\\*\\*": Text{[]Span{ +		Span{Format{}, "**"}, +	}}, +	"\\**": Text{[]Span{ +		Span{Format{}, "**"}, +	}}, +	"*\\*": Text{[]Span{ +		Span{Format{}, "**"}, +	}}, +	"\\": Text{[]Span{ +		Span{Format{}, "\\"}, +	}}, +	"\\\\": Text{[]Span{ +		Span{Format{}, "\\"}, +	}}, +	" ** // `` ": Text{[]Span{ +		Span{Format{Bold: true}, " "}, +		Span{Format{Bold: true, Italic: true}, " "}, +	}}, +	"**": Text{[]Span{}}, +	"**``__//foo": Text{[]Span{ +		Span{Format{Bold: true, Monospace: true, Underline: true, Italic: true}, "foo"}, +	}}, +	"**foo//bar**baz": Text{[]Span{ +		Span{Format{Bold: true}, "foo"}, +		Span{Format{Bold: true, Italic: true}, "bar"}, +		Span{Format{Italic: true}, "baz"}, +	}}, +	"@@foo": Text{[]Span{ +		Span{Format{Link: "foo"}, ""}, +	}}, +	"@@foo@@": Text{[]Span{ +		Span{Format{Link: "foo"}, ""}, +	}}, +	"@@foo bar@@": Text{[]Span{ +		Span{Format{Link: "foo"}, "bar"}, +	}}, +	"@@  foo": Text{[]Span{ +		Span{Format{Link: "foo"}, ""}, +	}}, +	"@@foo  ": Text{[]Span{ +		Span{Format{Link: "foo"}, ""}, +	}}, +	"@@foo\\": Text{[]Span{ +		Span{Format{Link: "foo\\"}, ""}, +	}}, +	"@@foo \\": Text{[]Span{ +		Span{Format{Link: "foo"}, "\\"}, +	}}, +	"@@foo \\\\": Text{[]Span{ +		Span{Format{Link: "foo"}, "\\"}, +	}}, +	"@@foo@": Text{[]Span{ +		Span{Format{Link: "foo@"}, ""}, +	}}, +	"@@foo\\@@": Text{[]Span{ +		Span{Format{Link: "foo@@"}, ""}, +	}}, +	"@@f\\\\o\\o\\n @": Text{[]Span{ +		Span{Format{Link: "f\\o\\o\n"}, "@"}, +	}}, +	"@@http://example.com foo **bar @@baz**": Text{[]Span{ +		Span{Format{Link: "http://example.com"}, "foo "}, +		Span{Format{Bold: true, Link: "http://example.com"}, "bar "}, +		Span{Format{Bold: true}, "baz"}, +	}}, +	"//@@http://example.com foo //bar @@": Text{[]Span{ +		Span{Format{Italic: true, Link: "http://example.com"}, "foo "}, +		Span{Format{Link: "http://example.com"}, "bar "}, +	}}, +	"__\\  asd \\ zxc\\ ": Text{[]Span{ +		Span{Format{Underline: true, Monospace: false}, "  asd  zxc "}, +	}}, +	"@@/ test/@@": Text{[]Span{ +		Span{Format{Link: "/"}, "test/"}, +	}}, +	"@@/ /test@@": Text{[]Span{ +		Span{Format{Link: "/"}, "/test"}, +	}}, +	"/": Text{[]Span{ +		Span{Format{}, "/"}, +	}}, +	"test/**": Text{[]Span{ +		Span{Format{}, "test/"}, +	}}, +	"//test/": Text{[]Span{ +		Span{Format{Italic: true}, "test/"}, +	}}, +	"/**test": Text{[]Span{ +		Span{Format{}, "/"}, +		Span{Format{Bold: true}, "test"}, +	}}, +} + +func TestParseParagraph(t *testing.T) { +	for k, v := range parseTests { +		t.Run(k, func(t *testing.T) { +			txt := ParseParagraph(k) +			if !textEqual(txt, v) { +				t.Errorf("ParseParagraph(%q):\nexpected: %#v\n     got: %#v", k, v, txt) +			} +		}) +	} +} + +func TestParse(t *testing.T) { +	for k, v := range parseTests { +		t.Run(k, func(t *testing.T) { +			txts := Parse(k) +			if len(txts) != 1 || !textEqual(txts[0], v) { +				t.Errorf("Parse(%q):\nexpected: %#v\n     got: %#v", k, []Text{v}, txts) +			} +		}) +	} +} + +func textEqual(a, b Text) bool { +	if len(a.Spans) != len(b.Spans) { +		return false +	} +	for i := range a.Spans { +		if a.Spans[i] != b.Spans[i] { +			return false +		} +	} +	return true +} + +var escapeTests = map[string]string{ +	"\n\r\t\v\x00":     "\\n\\r\\t\v\\x00", +	"@@!!##__//__``**": "\\@\\@!!##\\_\\_\\/\\/\\_\\_\\`\\`\\*\\*", +	`foo\@\@bar`:       `foo\\\@\\\@bar`, +} + +func TestEscape(t *testing.T) { +	for k, v := range escapeTests { +		t.Run(k, func(t *testing.T) { +			if e := Escape(k); e != v { +				t.Errorf("Escape(%q): expected %q, got %q", k, v, e) +			} +		}) +	} +} + +var parseTextTests = map[string]TextFmtContents{ +	"foo  ** bar\nbaz\n\n\nquux ** ": TextFmtContents{[]Text{ +		Text{[]Span{ +			Span{Format{}, "foo "}, +			Span{Format{Bold: true}, " bar baz"}, +		}}, +		Text{[]Span{ +			Span{Format{}, "quux "}, +		}}, +	}}, + +	"\n": TextFmtContents{}, + +	"foo": TextFmtContents{[]Text{ +		Text{[]Span{ +			Span{Format{}, "foo"}, +		}}, +	}}, + +	"\n\n": TextFmtContents{}, + +	"foo\n\t\t\t\t\nbar": TextFmtContents{[]Text{ +		Text{[]Span{Span{Format{}, "foo"}}}, +		Text{[]Span{Span{Format{}, "bar"}}}, +	}}, + +	"foo\n\t\t   \f\r\t\nbar": TextFmtContents{[]Text{ +		Text{[]Span{Span{Format{}, "foo"}}}, +		Text{[]Span{Span{Format{}, "bar"}}}, +	}}, + +	`foo**bar\*\*baz\*\*quux**qweasd`: TextFmtContents{[]Text{Text{[]Span{ +		Span{Format{}, "foo"}, +		Span{Format{Bold: true}, "bar**baz**quux"}, +		Span{Format{}, "qweasd"}, +	}}}}, +} + +func TestParseTextFmt(t *testing.T) { +	for k, v := range parseTextTests { +		t.Run(k, func(t *testing.T) { +			parser := cnm.NewParser(strings.NewReader(k)) +			err := parser.Next() +			if err != nil && err != io.EOF { +				t.Fatalf("error parsing %q: %v", k, err) +			} +			content, err := parseTextFmt(parser, cnm.TopLevel) +			if err != nil && err != io.EOF { +				t.Fatalf("error parsing %q: %v", k, err) +			} +			tf, ok := content.(TextFmtContents) +			if !ok { +				t.Fatalf("%q: expected type %T, got %T", k, v, content) +			} +			if !paragraphsEqual(v.Paragraphs, tf.Paragraphs) { +				t.Fatalf("%q:\nexpected: %#v\n     got: %#v", k, v, tf) +			} +			txts := Parse(k) +			if !paragraphsEqual(txts, v.Paragraphs) { +				t.Fatalf("%q:\nexpected: %#v\n     got: %#v", k, v.Paragraphs, txts) +			} +		}) +	} +} + +func paragraphsEqual(a, b []Text) bool { +	if len(a) != len(b) { +		return false +	} +	for i := range a { +		if !textEqual(a[i], b[i]) { +			return false +		} +	} +	return true +} + +var writeTests = map[string]TextFmtContents{ +	"": TextFmtContents{}, + +	"foo\n": TextFmtContents{[]Text{ +		Text{[]Span{ +			Span{Format{}, "foo"}, +		}}, +	}}, + +	"**foo\n": TextFmtContents{[]Text{ +		Text{[]Span{ +			Span{Format{Bold: true}, "foo"}, +		}}, +	}}, + +	"foo **bar baz\n\nquux\n": TextFmtContents{[]Text{ +		Text{[]Span{ +			Span{Format{}, "foo "}, +			Span{Format{Bold: true}, "bar baz"}, +		}}, +		Text{[]Span{ +			Span{Format{}, "quux"}, +		}}, +	}}, + +	"foo**bar``baz**quux\n\n" + +		"\\ __qwe\\ __//\\  asd \\ //``zxc``**\\ \n\n" + +		"//@@http://example.com exa//mple@@ @@href text@@// test\n": TextFmtContents{[]Text{ +		Text{[]Span{ +			Span{Format{}, "foo"}, +			Span{Format{Bold: true}, "bar"}, +			Span{Format{Bold: true, Monospace: true}, "baz"}, +			Span{Format{Monospace: true}, "quux"}, +		}}, +		Text{[]Span{ +			Span{Format{}, " "}, +			Span{Format{Underline: true}, "qwe "}, +			Span{Format{Italic: true}, "  asd  "}, +			Span{Format{Monospace: true}, "zxc"}, +			Span{Format{Bold: true}, " "}, +		}}, +		Text{[]Span{ +			Span{Format{Italic: true, Link: "http://example.com"}, "exa"}, +			Span{Format{Link: "http://example.com"}, "mple"}, +			Span{Format{}, " "}, +			Span{Format{Link: "href"}, "text"}, +			Span{Format{Italic: true}, " test"}, +		}}, +	}}, + +	"foo**bar\\*\\*baz\\*\\*quux**qweasd\n": TextFmtContents{[]Text{Text{[]Span{ +		Span{Format{}, "foo"}, +		Span{Format{Bold: true}, "bar**baz**quux"}, +		Span{Format{}, "qweasd"}, +	}}}}, +} + +func TestWriteTextFmt(t *testing.T) { +	for k, v := range writeTests { +		t.Run(k, func(t *testing.T) { +			var buf bytes.Buffer +			err := v.WriteIndent(&buf, 0) +			if err != nil { +				t.Fatalf("WriteIndent error: %v", err) +			} +			w := buf.String() +			t.Log("expected:\n" + k) +			t.Log("     got:\n" + w) +			if k != w { +				t.Fatalf("WriteIndent: output did not match expected document:\nexpected: %q\n     got: %q", k, w) +			} +		}) +	} +} + +func TestWriteParseTextFmt(t *testing.T) { +	for k, v := range writeTests { +		t.Run(k, func(t *testing.T) { +			var buf bytes.Buffer +			err := v.WriteIndent(&buf, 0) +			if err != nil { +				t.Fatalf("WriteIndent error: %v", err) +			} +			w := buf.String() + +			if w == "" { +				w = "\n" +			} +			parser := cnm.NewParser(strings.NewReader(w)) +			err = parser.Next() +			if err != nil && err != io.EOF { +				t.Fatalf("error parsing %q: %v", w, err) +			} +			content, err := parseTextFmt(parser, cnm.TopLevel) +			if err != nil && err != io.EOF { +				t.Fatalf("error parsing %q: %v", w, err) +			} +			tf, ok := content.(TextFmtContents) +			if !ok { +				t.Fatalf("%q: expected type %T, got %T", w, v, content) +			} +			if !paragraphsEqual(v.Paragraphs, tf.Paragraphs) { +				t.Fatalf("%q:\nexpected: %#v\n     got: %#v", k, v, tf) +			} +		}) +	} +} + +func TestParseWriteTextFmt(t *testing.T) { +	for k, v := range writeTests { +		t.Run(k, func(t *testing.T) { +			s := k +			if s == "" { +				s = "\n" +			} +			parser := cnm.NewParser(strings.NewReader(s)) +			err := parser.Next() +			if err != nil && err != io.EOF { +				t.Fatalf("error parsing %q: %v", k, err) +			} + +			content, err := parseTextFmt(parser, cnm.TopLevel) +			if err != nil && err != io.EOF { +				t.Fatalf("error parsing %q: %v", k, err) +			} +			tf, ok := content.(TextFmtContents) +			if !ok { +				t.Fatalf("%q: expected type %T, got %T", k, v, content) +			} +			if !paragraphsEqual(tf.Paragraphs, v.Paragraphs) { +				t.Fatalf("%q: expected %#v, got %#v", k, v, tf) +			} + +			var buf bytes.Buffer +			err = tf.WriteIndent(&buf, 0) +			if err != nil { +				t.Fatalf("WriteIndent error: %v", err) +			} + +			w := buf.String() +			/*if w == "\n" { +				k = "" +			}*/ + +			if k != w { +				t.Fatalf("%q:\nexpected: %#v\n     got: %#v", k, k, w) +			} +		}) +	} +} diff --git a/content.go b/content.go new file mode 100644 index 0000000..971e9e3 --- /dev/null +++ b/content.go @@ -0,0 +1,610 @@ +package cnm + +import ( +	"io" +	"strings" +) + +func init() { +	RegisterTextContentParser("", parseTextPlain) +	RegisterTextContentParser("plain", parseTextPlain) +	RegisterTextContentParser("pre", parseTextPre) +} + +// Block represents an arbitrary CNM within the "content" top-level block. +type Block interface { +	// Name returns the name of the block. +	Name() string + +	// Args returns the block arguments. +	Args() []string +	WriteIndent(w io.Writer, n int) error +} + +// ContentBlock represents a block that holds other content blocks. +type ContentBlock struct { +	name     string +	args     []string +	children []Block +} + +// WriteIndent writes the block header and its children indented by n tabs. +func (cb *ContentBlock) WriteIndent(w io.Writer, n int) error { +	ss := []string{Escape(cb.name)} +	ss = append(ss, cb.args...) +	if err := writeIndent(w, JoinEscape(ss), n); err != nil { +		return err +	} +	for _, ch := range cb.children { +		if err := ch.WriteIndent(w, n+1); err != nil { +			return err +		} +	} +	return nil +} + +// NewContentBlock creates a new ContentBlock with a name and argument. +func NewContentBlock(name string, args ...string) *ContentBlock { +	var a []string +	for _, arg := range args { +		if arg != "" { +			a = append(a, arg) +		} +	} +	return &ContentBlock{name: name, args: a} +} + +// Name returns the block's name. +func (cb *ContentBlock) Name() string { +	return cb.name +} + +// Args returns the block arguments. +func (cb *ContentBlock) Args() []string { +	return cb.args +} + +// Children returns the block's child blocks. +func (cb *ContentBlock) Children() []Block { +	return cb.children +} + +// AppendChild adds a new child block to the end of the list of children. +func (cb *ContentBlock) AppendChild(block Block) { +	cb.children = append(cb.children, block) +} + +func (cb *ContentBlock) parse(p *Parser, block *TokenBlock) (err error) { +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.Block() +		if blk, ok := token.(*TokenBlock); ok { +			var b Block +			switch blk.Name { +			case "section": +				b, err = parseContentSection(p, blk) +			case "text": +				b, err = parseContentText(p, blk) +			case "raw": +				b, err = parseContentRaw(p, blk) +			case "list": +				b, err = parseContentList(p, blk) +			case "table": +				b, err = parseContentTable(p, blk) +			case "embed": +				b, err = parseContentEmbed(p, blk) +			default: +				err = parseUnknown(p, blk) +			} +			if b != nil { +				cb.AppendChild(b) +			} +		} else if err = p.Next(); err != nil { +			break +		} +	} +	return +} + +// SectionBlock represents a "section" content block. +type SectionBlock struct { +	ContentBlock +} + +// NewSectionBlock creates a new SectionBlock with a title. +func NewSectionBlock(title string) *SectionBlock { +	return &SectionBlock{*NewContentBlock("section", title)} +} + +// Title returns the section block's title. +func (b *SectionBlock) Title() string { return strings.Join(b.args, " ") } + +func parseContentSection(p *Parser, block *TokenBlock) (*SectionBlock, error) { +	sec := NewSectionBlock(strings.Join(block.Args, " ")) +	if err := p.Next(); err != nil { +		return sec, err +	} +	return sec, sec.parse(p, block) +} + +// TextBlock represents a "text" content block. +type TextBlock struct { +	// Format is the text format (first word of the block argument). +	Format string +	// Contents are the text contents. +	Contents TextContents +} + +// NewTextBlock creates a new TextBlock containing arbitrary text contents. +func NewTextBlock(format string, contents TextContents) *TextBlock { +	return &TextBlock{format, contents} +} + +// Name returns the block name "text". +func (t *TextBlock) Name() string { return "text" } + +// Args returns the block's arguments (format). +func (t *TextBlock) Args() []string { +	if t.Format == "" { +		return nil +	} +	return []string{t.Format} +} + +// WriteIndent writes the block header and its content indented by n tabs. +func (t *TextBlock) WriteIndent(w io.Writer, n int) error { +	s := t.Name() +	if t.Format != "" { +		s += " " + Escape(t.Format) +	} +	if err := writeIndent(w, s, n); err != nil { +		return err +	} +	if err := t.Contents.WriteIndent(w, n+1); err != nil { +		return err +	} +	return nil +} + +func parseContentText(p *Parser, block *TokenBlock) (*TextBlock, error) { +	format := "" +	if len(block.Args) >= 1 { +		format = block.Args[0] +	} +	tb := NewTextBlock(format, nil) + +	if err := p.Next(); err != nil { +		return tb, err +	} + +	var err error +	tb.Contents, err = parseTextFormat(p, block, tb.Format) + +	return tb, err +} + +func parseTextFormat(p *Parser, block *TokenBlock, format string) (TextContents, error) { +	if parser := GetTextContentParser(format); parser != nil { +		return parser(p, block) +	} +	r, err := parseContentRaw(p, block) +	return TextPreContents{r.Contents}, err +} + +// TextContents represents the textual contents of a text block. +type TextContents interface { +	WriteIndent(w io.Writer, n int) error +} + +// TextContentParser parses text content in a text block. +type TextContentParser func(p *Parser, block *TokenBlock) (TextContents, error) + +var textContentParsers = map[string]TextContentParser{} + +// GetTextContentParser retrieves a text content parser or nil if it doesn't +// exist. +func GetTextContentParser(name string) TextContentParser { +	return textContentParsers[name] +} + +// RegisterTextContentParser registers a new text content parser for a format. +func RegisterTextContentParser(name string, parser TextContentParser) { +	if parser == nil { +		delete(textContentParsers, name) +	} else { +		textContentParsers[name] = parser +	} +} + +// TextPlainContents represents a list of simple text paragraphs. +type TextPlainContents struct { +	// Paragraphs is a list of simple text paragraphs. +	Paragraphs []string +} + +// WriteIndent writes the plain text content indented by n tabs. +func (t TextPlainContents) WriteIndent(w io.Writer, n int) error { +	for i, p := range t.Paragraphs { +		if i != 0 { +			if err := writeIndent(w, "", 0); err != nil { +				return err +			} +		} +		if err := writeIndent(w, Escape(p), n); err != nil { +			return err +		} +	} +	return nil +} + +// NewTextPlainBlock creates a new TextBlock containing TextPlainContents. +func NewTextPlainBlock(paragraphs []string) *TextBlock { +	par := make([]string, len(paragraphs)) +	copy(par, paragraphs) +	return NewTextBlock("", TextPlainContents{par}) +} + +func parseTextPlain(p *Parser, block *TokenBlock) (TextContents, error) { +	txt := TextPlainContents{} +	paragraph := "" +	var err error +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.SimpleText() +		end := false +		if text, ok := token.(*TokenSimpleText); ok { +			if text.Text == "" { +				end = true +			} else if paragraph == "" { +				paragraph = text.Text +			} else { +				paragraph += " " + text.Text +			} +		} else if _, ok := token.(*TokenEmptyLine); ok && paragraph != "" { +			end = true +		} +		if end { +			txt.Paragraphs = append(txt.Paragraphs, paragraph) +			paragraph = "" +		} +		err = p.Next() +	} +	if paragraph != "" { +		txt.Paragraphs = append(txt.Paragraphs, paragraph) +	} +	return txt, err +} + +// TextPreContents represents preformatted contents of a text block. +type TextPreContents struct { +	// Text is the preformatted content. +	Text string +} + +// WriteIndent writes the preformatted text content indented by n tabs. +func (t TextPreContents) WriteIndent(w io.Writer, n int) error { +	ss := strings.Split(t.Text, "\n") +	for _, s := range ss { +		if err := writeIndent(w, EscapeNonspace(s), n); err != nil { +			return err +		} +	} +	return nil +} + +// NewTextPreBlock creates a new TextBlock containing TextPreContents. +func NewTextPreBlock(text string) *TextBlock { +	return NewTextBlock("", TextPreContents{text}) +} + +func parseTextPre(p *Parser, block *TokenBlock) (TextContents, error) { +	var lines []string +	var ls []string +	var err error +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.RawText() +		if text, ok := token.(*TokenRawText); ok { +			if len(ls) > 0 { +				lines = append(lines, ls...) +				ls = ls[:0] +			} +			lines = append(lines, Unescape(text.Text)) +		} else if _, ok := token.(*TokenEmptyLine); ok && len(lines) > 0 { +			ls = append(ls, "") +		} +		err = p.Next() +	} +	return TextPreContents{strings.Join(lines, "\n")}, err +} + +// RawBlock represents a "raw" content block. +type RawBlock struct { +	// Syntax is the syntax of the block contents (first word of block argument) +	Syntax string + +	// Contents is the raw content. +	Contents string +} + +// Name returns the block name "raw". +func (r *RawBlock) Name() string { return "raw" } + +// Args returns the block's arguments (syntax). +func (r *RawBlock) Args() []string { +	if r.Syntax == "" { +		return nil +	} +	return []string{r.Syntax} +} + +// WriteIndent writes the raw content indented by n tabs. +func (r *RawBlock) WriteIndent(w io.Writer, n int) error { +	s := r.Name() +	if r.Syntax != "" { +		s += " " + Escape(r.Syntax) +	} +	if err := writeIndent(w, s, n); err != nil { +		return err +	} +	if r.Contents != "" { +		ss := strings.Split(r.Contents, "\n") +		for _, s := range ss { +			if err := writeIndent(w, s, n+1); err != nil { +				return err +			} +		} +	} +	return nil +} + +func parseContentRaw(p *Parser, block *TokenBlock) (*RawBlock, error) { +	arg := "" +	if len(block.Args) > 0 { +		arg = block.Args[0] +	} +	rb := &RawBlock{arg, ""} + +	if err := p.Next(); err != nil { +		return rb, err +	} + +	var lines []string +	var ls []string +	var err error +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.RawText() +		if text, ok := token.(*TokenRawText); ok { +			if len(ls) > 0 { +				lines = append(lines, ls...) +				ls = ls[:0] +			} +			lines = append(lines, text.Text) +		} else if _, ok := token.(*TokenEmptyLine); ok && len(lines) > 0 { +			ls = append(ls, "") +		} +		err = p.Next() +	} +	rb.Contents = strings.Join(lines, "\n") + +	return rb, err +} + +// ListBlock represents a "list" content block. +type ListBlock struct { +	ContentBlock +} + +// NewListBlock creates a new ListBlock. +// +// If the ordered parameter is true, the list is created in "ordered" mode. +func NewListBlock(ordered bool) *ListBlock { +	arg := "" +	if ordered { +		arg = "ordered" +	} +	return &ListBlock{*NewContentBlock("list", arg)} +} + +// Ordered returns true if the list is in ordered mode (first word of the +// block argument is "ordered"). +func (b *ListBlock) Ordered() bool { +	return len(b.args) >= 1 && b.args[0] == "ordered" +} + +func parseContentList(p *Parser, block *TokenBlock) (*ListBlock, error) { +	list := NewListBlock(false) +	list.args = block.Args +	if err := p.Next(); err != nil { +		return list, err +	} +	return list, list.parse(p, block) +} + +// TableBlock represents a "table" content block. +type TableBlock struct { +	rows []Block +} + +// NewTableBlock creates a new TableBlock. +func NewTableBlock() *TableBlock { +	return &TableBlock{} +} + +// Name returns the block name "table". +func (t *TableBlock) Name() string { +	return "table" +} + +// Args returns the block's nil arguments. +func (t *TableBlock) Args() []string { +	return nil +} + +// WriteIndent writes the table header and contents indented by n tabs. +func (t *TableBlock) WriteIndent(w io.Writer, n int) error { +	if err := writeIndent(w, t.Name(), n); err != nil { +		return err +	} +	for _, row := range t.rows { +		if err := row.WriteIndent(w, n+1); err != nil { +			return err +		} +	} +	return nil +} + +// Rows returns the table's rows. +func (t *TableBlock) Rows() []Block { +	return t.rows +} + +// AppendRow adds a new row to the end of the table. +func (t *TableBlock) AppendRow(row Block) { +	t.rows = append(t.rows, row) +} + +func (t *TableBlock) parse(p *Parser, block *TokenBlock) (err error) { +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.Block() +		if blk, ok := token.(*TokenBlock); ok { +			var b Block +			switch blk.Name { +			case "row": +				b, err = parseTableRow(p, blk) +			case "header": +				b, err = parseTableHeader(p, blk) +			default: +				err = parseUnknown(p, blk) +			} +			if b != nil { +				t.AppendRow(b) +			} +		} else if err = p.Next(); err != nil { +			break +		} +	} +	return +} + +func parseContentTable(p *Parser, block *TokenBlock) (*TableBlock, error) { +	table := NewTableBlock() +	if err := p.Next(); err != nil { +		return table, err +	} +	return table, table.parse(p, block) +} + +// RowBlock represents a "row" table block. +type RowBlock struct { +	ContentBlock +} + +// NewRowBlock creates a new RowBlock. +func NewRowBlock() *RowBlock { +	return &RowBlock{*NewContentBlock("row", "")} +} + +func parseTableRow(p *Parser, block *TokenBlock) (*RowBlock, error) { +	row := NewRowBlock() +	if err := p.Next(); err != nil { +		return row, err +	} +	return row, row.parse(p, block) +} + +// HeaderBlock represents a "header" table block. +type HeaderBlock struct { +	ContentBlock +} + +// NewHeaderBlock creates a new HeaderBlock. +func NewHeaderBlock() *HeaderBlock { +	return &HeaderBlock{*NewContentBlock("header", "")} +} + +func parseTableHeader(p *Parser, block *TokenBlock) (*HeaderBlock, error) { +	hdr := NewHeaderBlock() +	if err := p.Next(); err != nil { +		return hdr, err +	} +	return hdr, hdr.parse(p, block) +} + +// EmbedBlock represents an "embed" content block. +type EmbedBlock struct { +	// Type is the content type (first word of block argument). +	Type string + +	// URL is the content URL (second word of the block argument). +	URL string + +	// Description is the content description (block body as simple text). +	Description string +} + +// Name returns the block name "embed". +func (e *EmbedBlock) Name() string { return "embed" } + +// Args returns the block argument (type and URL). +func (e *EmbedBlock) Args() []string { +	if e.Type != "" && e.URL != "" { +		return []string{e.Type, e.URL} +	} +	return []string{e.Type} +} + +// WriteIndent writes the embed block header and contents indented by n tabs. +func (e *EmbedBlock) WriteIndent(w io.Writer, n int) error { +	if e.URL == "" { +		return nil +	} + +	s := e.Name() + " " +	if e.Type == "" { +		s += "*/*" +	} else { +		s += Escape(e.Type) +	} +	s += " " + Escape(e.URL) +	if err := writeIndent(w, s, n); err != nil { +		return err +	} +	if err := writeIndent(w, Escape(e.Description), n+1); err != nil { +		return err +	} +	return nil +} + +func parseContentEmbed(p *Parser, block *TokenBlock) (*EmbedBlock, error) { +	embed := &EmbedBlock{} +	if len(block.Args) >= 1 { +		embed.Type = block.Args[0] +		if len(block.Args) >= 2 { +			embed.URL = block.Args[1] +		} +	} +	if err := p.Next(); err != nil { +		return embed, err +	} +	s, err := getSimpleText(p, block) +	embed.Description = s +	return embed, err +} diff --git a/document.go b/document.go new file mode 100644 index 0000000..b5cdbe5 --- /dev/null +++ b/document.go @@ -0,0 +1,278 @@ +// Package cnm implements CNM document parsing and composition. +package cnm // import "contnet.org/lib/cnm-go" + +import ( +	"bufio" +	"io" +	"path" +	"strings" +) + +// Document represents a CNM document. +type Document struct { +	// Title is the document title (top-level "title" block). +	Title string + +	// Links is a list of document-level hyperlinks (top-level "links" block). +	Links []Link + +	// Site is a sitemap (top-level "site" block). +	Site Site + +	// Content is the document content (top-level "content" block). +	Content *ContentBlock +} + +// ParseDocument parses a CNM document from r. +func ParseDocument(r io.Reader) (doc *Document, err error) { +	p := NewParser(r) +	doc = &Document{} +	err = p.Next() +	for err == nil { +		token := p.Block() +		if err = p.Next(); err != nil { +			break +		} +		if blk, ok := token.(*TokenBlock); ok { +			switch blk.Name { +			case "title": +				err = doc.parseTitle(p, blk) +			case "links": +				err = doc.parseLinks(p, blk) +			case "site": +				err = doc.Site.parse(p, blk) +			case "content": +				if doc.Content == nil { +					doc.Content = &ContentBlock{name: "content"} +				} +				err = doc.Content.parse(p, blk) +			default: +				// discard lines inside this block +				for err == nil { +					if !p.Empty() && p.Indent() <= blk.Indent() { +						break +					} +					err = p.Next() +				} +			} +		} +	} +	if err == io.EOF { +		err = nil +	} +	return +} + +func (doc *Document) Write(w io.Writer) error { +	bw := bufio.NewWriter(w) +	if doc.Title != "" { +		if err := writeIndent(bw, "title", 0); err != nil { +			return err +		} +		if err := writeIndent(bw, Escape(doc.Title), 1); err != nil { +			return err +		} +	} +	if len(doc.Links) > 0 { +		if err := writeIndent(bw, "links", 0); err != nil { +			return err +		} +		for _, link := range doc.Links { +			if err := link.WriteIndent(bw, 1); err != nil { +				return err +			} +		} +	} +	if len(doc.Site.Children) > 0 { +		if err := writeIndent(bw, "site", 0); err != nil { +			return err +		} +		for _, site := range doc.Site.Children { +			if err := site.WriteIndent(bw, 1); err != nil { +				return err +			} +		} +	} +	if doc.Content != nil { +		if err := doc.Content.WriteIndent(bw, 0); err != nil { +			return err +		} +	} +	return bw.Flush() +} + +func (doc *Document) parseTitle(p *Parser, block *TokenBlock) (err error) { +	s, err := getSimpleText(p, block) +	if doc.Title == "" { +		doc.Title = s +	} else { +		doc.Title += " " + s +	} +	return +} + +func (doc *Document) parseLinks(p *Parser, block *TokenBlock) (err error) { +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.Block() +		if blk, ok := token.(*TokenBlock); ok { +			if blk.Name == "" { +				err = parseUnknown(p, blk) +			} else { +				link := Link{ +					URL:  blk.Name, +					Name: strings.Join(blk.Args, " "), +				} +				doc.Links = append(doc.Links, link) +				if err = p.Next(); err != nil { +					break +				} +				doc.Links[len(doc.Links)-1].Description, err = getSimpleText(p, blk) +			} +		} +	} +	return +} + +func getSimpleText(p *Parser, block *TokenBlock) (s string, err error) { +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.SimpleText() +		if text, ok := token.(*TokenSimpleText); ok && text.Text != "" { +			if s == "" { +				s = text.Text +			} else { +				s += " " + text.Text +			} +		} + +		err = p.Next() +	} +	return +} + +// Link represents a document-level hyperlink in the "links" top-level block. +type Link struct { +	// URL is the hyperlink URL. +	URL string + +	// Name is the hyperlink text. +	Name string + +	// Description is the description of the hyperlink. +	Description string +} + +// WriteIndent writes the link URL, name and description indented by n tabs. +func (link Link) WriteIndent(w io.Writer, n int) error { +	s := Escape(link.URL) +	if link.Name != "" { +		s += " " + Escape(link.Name) +	} +	if err := writeIndent(w, s, n); err != nil { +		return err +	} +	if link.Description != "" { +		if err := writeIndent(w, Escape(link.Description), n+1); err != nil { +			return err +		} +	} +	return nil +} + +// Site represents a node in the sitemap in the "site" top-level block. +type Site struct { +	// Path is the node's path fragment. +	Path string + +	// Name is the node's name. +	Name string + +	// Children are the nodes below this node. +	Children []Site +} + +// WriteIndent writes the sitemap indented by n tabs. +func (site Site) WriteIndent(w io.Writer, n int) error { +	s := Escape(site.Path) +	if site.Name != "" { +		s += " " + Escape(site.Name) +	} +	if err := writeIndent(w, s, n); err != nil { +		return err +	} +	for _, ch := range site.Children { +		if err := ch.WriteIndent(w, n+1); err != nil { +			return err +		} +	} +	return nil +} + +func (site *Site) parse(p *Parser, block *TokenBlock) (err error) { +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} + +		token := p.Block() +		if blk, ok := token.(*TokenBlock); ok { +			if blk.Name == "" { +				err = parseUnknown(p, blk) +			} else { +				s := Site{ +					Path: strings.Trim(path.Clean(blk.Name), "/"), +					Name: strings.Join(blk.Args, " "), +				} +				site.Children = append(site.Children, s) +				if err = p.Next(); err != nil { +					break +				} +				err = site.Children[len(site.Children)-1].parse(p, blk) +			} +		} else { +			err = p.Next() +		} +	} +	return +} + +func parseUnknown(p *Parser, block *TokenBlock) (err error) { +	err = p.Next() +	for err == nil { +		if !p.Empty() && p.Indent() <= block.Indent() { +			break +		} +		// discard lines inside this block +		err = p.Next() +	} +	return +} + +func writeIndent(w io.Writer, s string, depth int) error { +	const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" + +	if s == "" { +		_, err := w.Write([]byte{'\n'}) +		return err +	} +	if depth == 0 { +		_, err := w.Write([]byte(s + "\n")) +		return err +	} + +	var ind string +	if depth <= len(tabs) { +		ind = tabs[:depth] +	} else { +		ind = strings.Repeat("\t", depth) +	} +	_, err := w.Write([]byte(ind + s + "\n")) +	return err +} diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..02f7eb2 --- /dev/null +++ b/parse.go @@ -0,0 +1,189 @@ +package cnm + +import ( +	"bufio" +	"io" +) + +// TopLevel represents the top-level block. +var TopLevel = &TokenBlock{ +	TokenLine: TokenLine{ +		Indentation: -1, +		RawLine:     "", +		LineNo:      0, +	}, +	Parent: nil, +	Name:   "", +	Args:   nil, +} + +// Parser parses a CNM document by lines. +type Parser struct { +	r       *bufio.Reader +	line    int +	block   *TokenBlock +	current *TokenLine +	end     bool +} + +// NewParser creates a new Parser that reads from r. +func NewParser(r io.Reader) *Parser { +	return &Parser{ +		r:       bufio.NewReader(r), +		line:    0, +		block:   TopLevel, +		current: nil, +		end:     false, +	} +} + +// Line returns the number of the last parsed line in the document, starting +// with 1 after the first line. +func (p *Parser) Line() int { +	return p.line +} + +// Next retrieves the next line. +func (p *Parser) Next() error { +	line, err := p.nextLine() +	if err != nil { +		return err +	} +	indent := 0 +	for _, c := range line { +		if c != '\t' { +			break +		} +		indent++ +	} +	if indent > p.block.Indent()+1 { +		indent = p.block.Indent() + 1 +	} +	p.current = &TokenLine{ +		Indentation: indent, +		RawLine:     line, +		LineNo:      p.line, +	} +	if p.current.Indent() <= p.block.Indent() && !p.Empty() { +		p.block = p.block.Parent +	} +	return nil +} + +// Indent returns the indentation of the current line. +// +// Returns -1 if no line has been read yet. +func (p *Parser) Indent() int { +	if p.current == nil { +		return -1 +	} +	return p.current.Indent() +} + +// Empty returns true if the current line is empty. +func (p *Parser) Empty() bool { +	if p.current == nil { +		return true +	} +	if p.current.Indent() == len(p.current.Raw()) { +		return true +	} +	return false +} + +// Block parses the current line in block mode. +// +// Returns a TokenBlock if the line was not empty, otherwise TokenEmptyLine. In +// block mode, a line is empty even if its indentation exceeds the block +// content indentation, as long as it only contains tab characters. +// +// Next() must have been called before calling Block(). +func (p *Parser) Block() Token { +	line := p.current.Raw()[p.current.Indent():] + +	/*indent := 0 +	for _, c := range line { +		if c != '\t' { +			break +		} +		indent++ +	} +	if len(line) == indent { +		return &TokenEmptyLine{*p.current} +	}*/ + +	ss := SplitUnescape(line) +	if len(ss) == 0 || len(ss) == 1 && ss[0] == "" { +		return &TokenEmptyLine{*p.current} +	} + +	block := TokenBlock{ +		TokenLine: *p.current, +		Parent:    p.block, +	} +	block.Name = ss[0] +	if len(ss) > 1 { +		block.Args = ss[1:] +	} + +	p.block = &block + +	return &block +} + +// RawText parses the current line as raw text. +// +// Returns a TokenRawText if the line was not empty, otherwise +// TokenEmptyLine. +// +// Next() must have been called before calling RawText(). +func (p *Parser) RawText() Token { +	if p.Empty() { +		return &TokenEmptyLine{*p.current} +	} +	return &TokenRawText{ +		TokenLine: *p.current, +		Text:      p.current.Raw()[p.current.Indent():], +	} +} + +// SimpleText parses the current line as simple text. +// +// Returns a TokenSimpleText if the line was not empty, otherwise +// TokenEmptyLine. +// +// Next() must have been called before calling SimpleText(). +func (p *Parser) SimpleText() Token { +	if p.Empty() { +		return &TokenEmptyLine{*p.current} +	} +	return &TokenSimpleText{ +		TokenLine: *p.current, +		Text:      ParseSimpleText(p.current.Raw()[p.current.Indent():]), +	} +} + +func (p *Parser) nextLine() (string, error) { +	l, err := p.r.ReadString('\n') +	if err == io.EOF { +		if l != "" { +			err = nil +		} else if !p.end { // XXX +			l = "\n" +			p.end = true +			err = nil +		} +	} +	rs := make([]rune, len(l)) +	ri := 0 +	for _, r := range l { +		switch r { +		case '\n', '\r', '\x00': +			continue +		} +		rs[ri] = r +		ri++ +	} +	p.line++ +	return string(rs[:ri]), err +} diff --git a/parse_test.go b/parse_test.go new file mode 100644 index 0000000..b47dc14 --- /dev/null +++ b/parse_test.go @@ -0,0 +1,701 @@ +package cnm + +import ( +	"reflect" +	"strings" +	"testing" + +	"github.com/davecgh/go-spew/spew" +) + +var parseTests = map[string]*Document{ +	"": &Document{}, + +	"foo\n\tbar\ntitle\n\ttest": &Document{ +		Title: "test", +	}, + +	"foo\n\tbar\ntitle\n\ttest\nfoo\n\tbar": &Document{ +		Title: "test", +	}, + +	"title\n\ttest": &Document{ +		Title: "test", +	}, + +	"\ntitle\n\ttest\n": &Document{ +		Title: "test", +	}, + +	"title\n\ttest\n\n": &Document{ +		Title: "test", +	}, + +	"\ntitle\n\t\t\t\t\t\n\t\tfoo bar": &Document{ +		Title: "foo bar", +	}, + +	"site\n\tfoo\nsite\n\t\tbar\n": &Document{ +		Site: Site{Children: []Site{ +			Site{Path: "foo"}, +		}}, +	}, + +	"content\n\ttext\n\t\tfoo\ncontent\n\t\tbar\n": &Document{ +		Content: &ContentBlock{ +			name: "content", +			children: []Block{ +				&TextBlock{ +					Format: "", +					Contents: TextPlainContents{ +						Paragraphs: []string{"foo"}, +					}, +				}, +			}, +		}, +	}, + +	"\n\ttitle\n\t\t\t\t\t\n\tfoo bar": &Document{}, + +	"\ttitle\n\t\tfoo\n": &Document{}, + +	"\tsite\n\t\tfoo\n": &Document{}, + +	"\tlinks\n\t\tfoo\n": &Document{}, + +	"links\n\tfoo": &Document{ +		Links: []Link{ +			Link{ +				URL: "foo", +			}, +		}, +	}, + +	"qwe\ntitle\n\tasd": &Document{ +		Title: "asd", +	}, + +	"links\n\t qwe\n\tasd": &Document{ +		Links: []Link{ +			Link{URL: "asd"}, +		}, +	}, + +	"site\n\t qwe\n\tasd": &Document{ +		Site: Site{Children: []Site{ +			Site{Path: "asd"}, +		}}, +	}, + +	"site\n\tba\\nr": &Document{ +		Site: Site{ +			Children: []Site{ +				Site{ +					Path: "ba\nr", +				}, +			}, +		}, +	}, + +	"site\n\t\t\t\tba\\nr": &Document{}, + +	"site\n\tfoo\tbar": &Document{ +		Site: Site{ +			Children: []Site{ +				Site{ +					Path: "foo", +					Name: "bar", +				}, +			}, +		}, +	}, + +	"\t\tsite\n\t\t\t\tfoo": &Document{}, + +	"\tsite\n\tbar": &Document{}, + +	"content\n\tsection test\n": &Document{ +		Content: &ContentBlock{ +			name: "content", +			args: nil, +			children: []Block{ +				&SectionBlock{ContentBlock{ +					name: "section", +					args: []string{"test"}, +				}}, +			}, +		}, +	}, + +	"content\n\tnosuchblock\n\tsection test\n": &Document{ +		Content: &ContentBlock{ +			name: "content", +			args: nil, +			children: []Block{ +				&SectionBlock{ContentBlock{ +					name: "section", +					args: []string{"test"}, +				}}, +			}, +		}, +	}, + +	"content\n\tnosuchblock\n\tsection test\n\n\tnosuchblock2": &Document{ +		Content: &ContentBlock{ +			name: "content", +			args: nil, +			children: []Block{ +				&SectionBlock{ContentBlock{ +					name: "section", +					args: []string{"test"}, +				}}, +			}, +		}, +	}, + +	"content\n\tsection\n\t\tnosuchblock\n\t\tsection\n\t\t\ttext\n\t\t\t\ttest": &Document{ +		Content: &ContentBlock{ +			name: "content", +			args: nil, +			children: []Block{ +				&SectionBlock{ContentBlock{ +					name: "section", +					args: nil, +					children: []Block{ +						&SectionBlock{ContentBlock{ +							name: "section", +							args: nil, +							children: []Block{ +								&TextBlock{ +									Format: "", +									Contents: TextPlainContents{Paragraphs: []string{ +										"test", +									}}, +								}, +							}, +						}}, +					}, +				}}, +			}, +		}, +	}, + +	"content\n\ttable\n\t\tnosuchblock\n\t\trow\n\t\t\ttext\n\t\t\t\ttest": &Document{ +		Content: &ContentBlock{ +			name: "content", +			args: nil, +			children: []Block{ +				&TableBlock{rows: []Block{ +					&RowBlock{ContentBlock{ +						name: "row", +						args: nil, +						children: []Block{ +							&TextBlock{ +								Format: "", +								Contents: TextPlainContents{Paragraphs: []string{ +									"test", +								}}, +							}, +						}, +					}}, +				}}, +			}, +		}, +	}, + +	"site\n\t\ttest\n\tfoo\\ bar baz\n" + +		"links\n\tfoo\\ bar baz\n\t\t\tquux\n" + +		"content\n" + +		"links\n\t\ttest\n" + +		"content\n" + +		"\tsection\n\tsection qweasd\n" + +		"\tsection foo\\ bar  baz\n\t\ttext\n\t\t\ttest\n": &Document{ +		Site: Site{ +			Children: []Site{ +				Site{ +					Path: "foo bar", +					Name: "baz", +				}, +			}, +		}, +		Links: []Link{ +			Link{ +				URL:         "foo bar", +				Name:        "baz", +				Description: "quux", +			}, +		}, +		Content: &ContentBlock{ +			name: "content", +			args: nil, +			children: []Block{ +				&SectionBlock{ContentBlock{ +					name: "section", +					args: nil, +				}}, +				&SectionBlock{ContentBlock{ +					name: "section", +					args: []string{"qweasd"}, +				}}, +				&SectionBlock{ContentBlock{ +					name: "section", +					args: []string{"foo bar baz"}, +					children: []Block{ +						&TextBlock{ +							Contents: TextPlainContents{[]string{ +								"test", +							}}, +						}, +					}, +				}}, +			}, +		}, +	}, + +	"title\n\tfoo bar\n" + +		"links\n\tqwe asd\n\tzxc 123\n" + +		"site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n" + +		"title\n\tbaz\n" + +		"links\n\tfoo\n" + +		"site\n\ttest": &Document{ +		Title: "foo bar baz", +		Links: []Link{ +			Link{"qwe", "asd", ""}, +			Link{"zxc", "123", ""}, +			Link{"foo", "", ""}, +		}, +		Site: Site{ +			Children: []Site{ +				Site{ +					Path: "foo", +					Children: []Site{ +						Site{Path: "bar"}, +						Site{ +							Path: "baz/quux", +							Children: []Site{ +								Site{Path: "123"}, +							}, +						}, +					}, +				}, +				Site{Path: "test"}, +			}, +		}, +	}, + +	` +thing stuff +	whatever +title blah + + +	Test  + +title  +	  document + +content +	section   Test   section   +		text +			This is \n just a +		text pre + +			  t  e \n s  t   +				 + + +			preformatted text + +		raw text/plain +			of various \n features +	 +		section of\ the +			table +				header +					text +						Column 1 +					text +						Column 2 +				row +					text +						CNM +					text +						document +										 +						format +				row +					section +						lorem +					list +						text +							ipsum +						list ordered +							list unordered +								text +									dolor +			 +									sit +									amet +	embed text/cnm cnp://example.com/ stuff +		thing +		whatever +`: &Document{ +		Title: "Test document", +		Content: &ContentBlock{ +			name: "content", +			args: nil, +			children: []Block{ +				&SectionBlock{ContentBlock{ +					name: "section", +					args: []string{"Test section"}, +					children: []Block{ +						&TextBlock{ +							Contents: TextPlainContents{[]string{ +								"This is \n just a", +							}}, +						}, +						&TextBlock{ +							Format: "pre", +							Contents: TextPreContents{ +								"  t  e \n s  t  \n\t\n\n\npreformatted text", +							}, +						}, +						&RawBlock{ +							Syntax:   "text/plain", +							Contents: "of various \\n features", +						}, +						&SectionBlock{ContentBlock{ +							name: "section", +							args: []string{"of the"}, +							children: []Block{ +								&TableBlock{[]Block{ +									&HeaderBlock{ContentBlock{ +										name: "header", +										args: nil, +										children: []Block{ +											&TextBlock{ +												Contents: TextPlainContents{[]string{ +													"Column 1", +												}}, +											}, +											&TextBlock{ +												Contents: TextPlainContents{[]string{ +													"Column 2", +												}}, +											}, +										}, +									}}, +									&RowBlock{ContentBlock{ +										name: "row", +										args: nil, +										children: []Block{ +											&TextBlock{ +												Contents: TextPlainContents{[]string{ +													"CNM", +												}}, +											}, +											&TextBlock{ +												Contents: TextPlainContents{[]string{ +													"document", +													"format", +												}}, +											}, +										}, +									}}, +									&RowBlock{ContentBlock{ +										name: "row", +										args: nil, +										children: []Block{ +											&SectionBlock{ContentBlock{ +												name: "section", +												args: nil, +											}}, +											&ListBlock{ContentBlock{ +												name: "list", +												args: nil, +												children: []Block{ +													&TextBlock{ +														Contents: TextPlainContents{[]string{ +															"ipsum", +														}}, +													}, +													&ListBlock{ContentBlock{ +														name: "list", +														args: []string{"ordered"}, +														children: []Block{ +															&ListBlock{ContentBlock{ +																name: "list", +																args: []string{"unordered"}, +																children: []Block{ +																	&TextBlock{ +																		Contents: TextPlainContents{[]string{ +																			"dolor", +																			"sit amet", +																		}}, +																	}, +																}, +															}}, +														}, +													}}, +												}, +											}}, +										}, +									}}, +								}}, +							}, +						}}, +					}, +				}}, +				&EmbedBlock{ +					Type:        "text/cnm", +					URL:         "cnp://example.com/", +					Description: "thing whatever", +				}, +			}, +		}, +	}, +} + +func TestParse(t *testing.T) { +	for k, v := range parseTests { +		t.Run(k, func(t *testing.T) { +			d, err := ParseDocument(strings.NewReader(k)) +			if err != nil { +				t.Fatalf("ParseDocument(%q): error: %v", k, err) +			} +			if !documentEqual(d, v) { +				t.Fatalf("ParseDocument(%q):\nexpected:\n%s\n     got:\n%s", k, reprDoc(v), reprDoc(d)) +			} +		}) +	} +} + +func reprDoc(d *Document) string { +	//return fmt.Sprintf("Document{Title: %q, Links: %+v, Site: %+v, Content: %s}", d.Title, d.Links, d.Site, reprContent(d.Content)) +	return spew.Sdump(d) +} + +func documentEqual(a, b *Document) bool { +	if a.Title != b.Title { +		return false +	} +	if len(a.Links) != len(b.Links) { +		return false +	} +	for i := range a.Links { +		if !linkEqual(a.Links[i], b.Links[i]) { +			return false +		} +	} +	if !siteEqual(a.Site, b.Site) { +		return false +	} +	if !contentBlockEqual(a.Content, b.Content) { +		return false +	} +	return true +} + +func linkEqual(a, b Link) bool { +	return a == b +} + +func siteEqual(a, b Site) bool { +	if a.Path != b.Path { +		return false +	} +	if a.Name != b.Name { +		return false +	} +	if len(a.Children) != len(b.Children) { +		return false +	} +	for i := range a.Children { +		if !siteEqual(a.Children[i], b.Children[i]) { +			return false +		} +	} +	return true +} + +func blockEqual(a, b Block) bool { +	switch va := a.(type) { +	case *SectionBlock: +		vb, ok := b.(*SectionBlock) +		if !ok { +			return false +		} +		return sectionBlockEqual(va, vb) + +	case *TextBlock: +		vb, ok := b.(*TextBlock) +		if !ok { +			return false +		} +		return textBlockEqual(va, vb) + +	case *RawBlock: +		vb, ok := b.(*RawBlock) +		if !ok { +			return false +		} +		return rawBlockEqual(va, vb) + +	case *ListBlock: +		vb, ok := b.(*ListBlock) +		if !ok { +			return false +		} +		return listBlockEqual(va, vb) + +	case *TableBlock: +		vb, ok := b.(*TableBlock) +		if !ok { +			return false +		} +		return tableBlockEqual(va, vb) + +	case *HeaderBlock: +		vb, ok := b.(*HeaderBlock) +		if !ok { +			return false +		} +		return headerBlockEqual(va, vb) + +	case *RowBlock: +		vb, ok := b.(*RowBlock) +		if !ok { +			return false +		} +		return rowBlockEqual(va, vb) + +	case *EmbedBlock: +		vb, ok := b.(*EmbedBlock) +		if !ok { +			return false +		} +		return embedBlockEqual(va, vb) + +	case *ContentBlock: +		vb, ok := b.(*ContentBlock) +		if !ok { +			return false +		} +		return contentBlockEqual(va, vb) + +	default: // shouldn't happen +		return false +	} +} + +func contentBlockEqual(a, b *ContentBlock) bool { +	if (a == nil) != (b == nil) { +		return false +	} +	if a == nil { +		return true +	} +	if a.Name() != b.Name() { +		return false +	} +	aa, ba := a.Args(), b.Args() +	if len(aa) != len(ba) { +		return false +	} +	for i := range aa { +		if aa[i] != ba[i] { +			return false +		} +	} +	ca, cb := a.Children(), b.Children() +	if len(ca) != len(cb) { +		return false +	} +	for i := range ca { +		if !blockEqual(ca[i], cb[i]) { +			return false +		} +	} +	return true +} + +func sectionBlockEqual(a, b *SectionBlock) bool { +	return contentBlockEqual(&a.ContentBlock, &b.ContentBlock) +} + +func textBlockEqual(a, b *TextBlock) bool { +	if a.Format != b.Format { +		return false +	} +	return textContentsEqual(a.Contents, b.Contents) +} + +func textContentsEqual(a, b TextContents) bool { +	switch va := a.(type) { +	case TextPlainContents: +		vb, ok := b.(TextPlainContents) +		if !ok { +			return false +		} +		return textPlainContentsEqual(va, vb) + +	case TextPreContents: +		vb, ok := b.(TextPreContents) +		if !ok { +			return false +		} +		return textPreContentsEqual(va, vb) + +	default: +		return reflect.TypeOf(a) == reflect.TypeOf(b) && reflect.DeepEqual(a, b) +	} +} + +func textPlainContentsEqual(a, b TextPlainContents) bool { +	if len(a.Paragraphs) != len(b.Paragraphs) { +		return false +	} +	for i := range a.Paragraphs { +		if a.Paragraphs[i] != b.Paragraphs[i] { +			return false +		} +	} +	return true +} + +func textPreContentsEqual(a, b TextPreContents) bool { +	return a == b +} + +func rawBlockEqual(a, b *RawBlock) bool { +	return *a == *b +} + +func listBlockEqual(a, b *ListBlock) bool { +	return contentBlockEqual(&a.ContentBlock, &b.ContentBlock) +} + +func tableBlockEqual(a, b *TableBlock) bool { +	ra, rb := a.Rows(), b.Rows() +	if len(ra) != len(rb) { +		return false +	} +	for i := range ra { +		if !blockEqual(ra[i], rb[i]) { +			return false +		} +	} +	return true +} + +func rowBlockEqual(a, b *RowBlock) bool { +	return contentBlockEqual(&a.ContentBlock, &b.ContentBlock) +} + +func headerBlockEqual(a, b *HeaderBlock) bool { +	return contentBlockEqual(&a.ContentBlock, &b.ContentBlock) +} + +func embedBlockEqual(a, b *EmbedBlock) bool { +	return *a == *b +} diff --git a/simpletext.go b/simpletext.go new file mode 100644 index 0000000..78e089e --- /dev/null +++ b/simpletext.go @@ -0,0 +1,196 @@ +package cnm + +import ( +	"bytes" +	"regexp" +	"strconv" +	"strings" +) + +// ParseSimpleText parses raw as simple text (collapses whitespace and resolves +// escape sequences). +func ParseSimpleText(raw string) string { +	return Unescape(CollapseWhitespace(raw)) +} + +// CollapseWhitespace collapses sequences of non-escaped whitespace in raw CNM +// simple text into single spaces. +func CollapseWhitespace(raw string) string { +	s := strings.Join(strings.FieldsFunc(raw, func(r rune) bool { +		switch r { +		case '\t', '\n', '\f', '\r': +			return true +		} +		return false +	}), " ") + +	var buf bytes.Buffer +	first := true +	escape := false +	space := false +	for _, r := range s { +		switch r { +		case '\\': +			if escape { +				buf.WriteString("\\\\") +				escape = false +			} else { +				escape = true +			} +			if space && !first { +				buf.WriteByte(' ') +			} +			space = false +			first = false +		case ' ': +			if escape { +				buf.WriteString("\\ ") +				escape = false +			} else { +				space = true +			} +		default: +			if escape { +				buf.WriteByte('\\') +			} +			if space && !first { +				buf.WriteByte(' ') +			} +			buf.WriteRune(r) +			escape = false +			space = false +			first = false +		} +	} +	if escape { +		buf.WriteByte('\\') +	} +	return buf.String() +} + +var escapeRe = regexp.MustCompile(`[\t\n\f\r\\\x00]|^ | $|  `) + +// Escape escapes whitespace, backslash and and U+0000 within s. +func Escape(s string) string { +	return escapeRe.ReplaceAllStringFunc(s, func(match string) string { +		switch match { +		case "\t": +			return `\t` +		case "\n": +			return `\n` +		case "\f": +			return `\f` +		case "\r": +			return `\r` +		case "\\": +			return `\\` +		case "\x00": +			return `\x00` +		case " ": +			return `\ ` +		case "  ": +			return ` \ ` +		} +		return match // this shouldn't happen +	}) +} + +var escapeSpaceRe = regexp.MustCompile(`[\t\n]|^ | $|  `) + +// EscapeSpace works like Escape, except it only escapes spaces, tabs and line +// feeds. +func EscapeSpace(s string) string { +	return escapeSpaceRe.ReplaceAllStringFunc(s, func(match string) string { +		switch match { +		case "\t": +			return `\t` +		case "\n": +			return `\n` +		case " ": +			return `\ ` +		case "  ": +			return ` \ ` +		} +		return match // this shouldn't happen +	}) +} + +var escapeNonspaceRe = regexp.MustCompile(`[\f\r\\\x00]`) + +// EscapeNonspace works like Escape, except it does not escape spaces, tabs and +// line feeds. +func EscapeNonspace(s string) string { +	return escapeNonspaceRe.ReplaceAllStringFunc(s, func(match string) string { +		switch match { +		case "\f": +			return `\f` +		case "\r": +			return `\r` +		case "\\": +			return `\\` +		case "\x00": +			return `\x00` +		} +		return match // this shouldn't happen +	}) +} + +var unescapeRe = regexp.MustCompile(`\\(?:[btnvfr \\]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})`) + +// Unescape resolves escape sequences in simple text. +func Unescape(s string) string { +	return unescapeRe.ReplaceAllStringFunc(s, func(match string) string { +		switch match[1] { +		case 'b': +			return "\b" +		case 't': +			return "\t" +		case 'n': +			return "\n" +		case 'v': +			return "\v" +		case 'f': +			return "\f" +		case 'r': +			return "\r" +		case ' ': +			return " " +		case '\\': +			return "\\" +		case 'x': +			n, _ := strconv.ParseUint(match[2:], 16, 8) +			return string(n) +		case 'u': +			n, _ := strconv.ParseUint(match[2:], 16, 16) +			return string(n) +		case 'U': +			n, _ := strconv.ParseUint(match[2:], 16, 32) +			return string(n) +		} +		return match // this shouldn't happen +	}) +} + +var splitRe = regexp.MustCompile(`((?:[^\t\n\f\r\\ ]|\\.?)+|^)`) + +// SplitUnescape splits the string s by whitespace, then unescapes simple text +// escape sequences. +func SplitUnescape(s string) []string { +	ss := splitRe.FindAllString(s, -1) +	for i := range ss { +		ss[i] = Unescape(ss[i]) +	} +	return ss +} + +// JoinEscape escapes each argument using simple text escape sequences and then +// joins them with spaces. +func JoinEscape(ss []string) string { +	var l []string +	for _, s := range ss { +		if s != "" { +			l = append(l, Escape(s)) +		} +	} +	return strings.Join(l, " ") +} diff --git a/simpletext_test.go b/simpletext_test.go new file mode 100644 index 0000000..8fdf754 --- /dev/null +++ b/simpletext_test.go @@ -0,0 +1,180 @@ +package cnm + +import "testing" + +var simpleEscapes = map[string]string{ +	"":        ``, +	"ContNet": `ContNet`, +	"\t":      `\t`, +	"\n":      `\n`, +	"\f":      `\f`, +	"\r":      `\r`, +	" ":       `\ `, +	"\\":      `\\`, +	"\x00":    `\x00`, +	"       ": `\  \  \  \ `, +	"      ":  `\  \  \ \ `, +	"     ":   `\  \  \ `, +	"    ":    `\  \ \ `, +	"   ":     `\  \ `, +	"  ":      `\ \ `, +	"\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff", +} + +func TestEscape(t *testing.T) { +	for k, v := range simpleEscapes { +		t.Run(k, func(t *testing.T) { +			e := Escape(k) +			if e != v { +				t.Errorf("Escape(%q) -> %q, expected %q", k, e, v) +			} +		}) +	} +} + +var nonspaceEscapes = map[string]string{ +	"":        ``, +	"ContNet": `ContNet`, +	"\t":      "\t", +	"\n":      "\n", +	"\f":      `\f`, +	"\r":      `\r`, +	" ":       ` `, +	"\\":      `\\`, +	"\x00":    `\x00`, +	"       ": `       `, +	"      ":  `      `, +	"     ":   `     `, +	"    ":    `    `, +	"   ":     `   `, +	"  ":      `  `, +	"\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\\\\x00\xff\u00ff\\\\xff", +} + +func TestEscapeNonspace(t *testing.T) { +	for k, v := range nonspaceEscapes { +		t.Run(k, func(t *testing.T) { +			e := EscapeNonspace(k) +			if e != v { +				t.Errorf("EscapeNonspace(%q) -> %q, expected %q", k, e, v) +			} +		}) +	} +} + +var spaceEscapes = map[string]string{ +	"":        ``, +	"ContNet": `ContNet`, +	"\t":      `\t`, +	"\n":      `\n`, +	"\f":      "\f", +	"\r":      "\r", +	" ":       `\ `, +	"\\":      `\`, +	"\x00":    "\x00", +	"       ": `\  \  \  \ `, +	"      ":  `\  \  \ \ `, +	"     ":   `\  \  \ `, +	"    ":    `\  \ \ `, +	"   ":     `\  \ `, +	"  ":      `\ \ `, +	"\b\v\\\x00\xff\u00ff\\xff": "\b\v\\\x00\xff\u00ff\\xff", +} + +func TestEscapeSpace(t *testing.T) { +	for k, v := range spaceEscapes { +		t.Run(k, func(t *testing.T) { +			e := EscapeSpace(k) +			if e != v { +				t.Errorf("EscapeSpace(%q) -> %q, expected %q", k, e, v) +			} +		}) +	} +} + +var simpleUnescapes = map[string]string{ +	``:                                         "", +	`ContNet`:                                  "ContNet", +	`\b`:                                       "\b", +	`\t`:                                       "\t", +	`\n`:                                       "\n", +	`\v`:                                       "\v", +	`\f`:                                       "\f", +	`\r`:                                       "\r", +	`\ `:                                       " ", +	`\\`:                                       "\\", +	`\`:                                        "\\", +	`\x00`:                                     "\x00", +	`a\nb\ c\rd\be\\f`:                         "a\nb c\rd\be\\f", +	`\n\n\n`:                                   "\n\n\n", +	`\x00\xff\n\x123`:                          "\x00\u00ff\n\x123", +	"  \b\\b\t\n\v\f\r\\x00\x00\\\\xff":        "  \b\b\t\n\v\f\r\x00\x00\\xff", +	`\xAA\xAa\xaA\xaa`:                         "\u00aa\u00aa\u00aa\u00aa", +	`\x00\xfg`:                                 "\x00\\xfg", +	`\\\\\\`:                                   "\\\\\\", +	"\b5Ὂg̀9!\\n℃ᾭG":                           "\b5Ὂg̀9!\n℃ᾭG", +	"\xff\\x00\xee\xaa\xee":                    "\xff\x00\xee\xaa\xee", +	"\\x00\x10\\ \x30\x40":                     "\x00\x10\x20\x30\x40", +	"\x10\x50\x90\xe0":                         "\x10\x50\x90\xe0", +	`Hello,\ 世界`:                               "Hello, 世界", +	"\xed\x9f\xbf":                             "\xed\x9f\xbf", +	"\xee\x80\x80":                             "\xee\x80\x80", +	"\xef\xbf\xbd":                             "\xef\xbf\xbd", +	"\x80\x80\x80\x80":                         "\x80\x80\x80\x80", +	`\ \  \  `:                                 "     ", +	`\uffff\u0000\u0123\ufedc\ufffe`:           "\uffff\u0000\u0123\ufedc\ufffe", +	`\Uffff0000\U0003fedc\U0010ffff\U00110000`: "\ufffd\U0003fedc\U0010ffff\ufffd", +	`\x0x\u012x\U0123456x`:                     "\\x0x\\u012x\\U0123456x", +	`\U0123456`:                                "\\U0123456", +	`\u012`:                                    "\\u012", +	`\x0`:                                      "\\x0", +	`\x\u\U\a\z\0\-`:                           "\\x\\u\\U\\a\\z\\0\\-", +} + +func TestUnescape(t *testing.T) { +	for k, v := range simpleUnescapes { +		t.Run(k, func(t *testing.T) { +			u := Unescape(k) +			if u != v { +				t.Errorf("Unescape(%q) -> %q, expected %q", k, u, v) +			} +		}) +	} +} + +var simpleTexts = map[string]string{ +	"foo":                         "foo", +	"\n":                          "", +	"\n\r \t\v\f":                 "\v", +	" ":                           "", +	`\ `:                          " ", +	` \ `:                         " ", +	`\  `:                         " ", +	`\  \ `:                       "   ", +	` \`:                          "\\", +	`\`:                           "\\", +	`    \   `:                    " ", +	`     `:                       "", +	`\  \  `:                      "   ", +	` \  `:                        " ", +	"  qwe  asd  ":                "qwe asd", +	"\\  qwe\nasd\n\nzxc\\n123\n": "  qwe asd zxc\n123", +	`\   \   \ \ \ `:              "       ", +	` \ \   \   `:                 "    ", +	` \\ `:                        "\\", +	`\ \\ `:                       " \\", +	` \\\ `:                       "\\ ", +	` \ \\\  `:                    " \\ ", +	`\  \\ \ `:                    "  \\  ", +} + +func TestParseSimpleText(t *testing.T) { +	for k, v := range simpleTexts { +		t.Run(k, func(t *testing.T) { +			u := ParseSimpleText(k) +			if u != v { +				t.Errorf("ParseSimpleText(%q) -> %q, expected %q", k, u, v) +			} +		}) +	} +} diff --git a/token.go b/token.go new file mode 100644 index 0000000..a6b08b9 --- /dev/null +++ b/token.go @@ -0,0 +1,58 @@ +package cnm + +// Token represents a parsed line in a CNM document. +type Token interface { +	Indent() int +	Raw() string +	Line() int +} + +// TokenLine represents an arbitrary CNM line. +type TokenLine struct { +	Indentation int +	RawLine     string +	LineNo      int +} + +// Indent returns the indentation of the parsed line. +func (t *TokenLine) Indent() int { return t.Indentation } + +// Raw returns the original unparsed line. +func (t *TokenLine) Raw() string { return t.RawLine } + +// Line returns the line number in the document, starting from 1. +func (t *TokenLine) Line() int { return t.LineNo } + +// TokenEmptyLine represents an empty line. +// +// A line is empty as long as it contains up to as many tab characters as the +// line's indentation and nothing else. +type TokenEmptyLine struct { +	TokenLine +} + +// TokenBlock represents a block header line. +type TokenBlock struct { +	TokenLine +	// Parent is the parent block +	Parent *TokenBlock +	// Name is the block name. +	Name string +	// Args are the block arguments, split by whitespace and then parsed as +	// simple text. +	Args []string +} + +// TokenSimpleText represents a line of simple text. +type TokenSimpleText struct { +	TokenLine +	// Text is the line contents parsed as simple text. +	Text string +} + +// TokenRawText represents a non-empty line with unparsed contents. +type TokenRawText struct { +	TokenLine +	// Text is the raw contents of the line with the indentation removed. +	Text string +} diff --git a/write_test.go b/write_test.go new file mode 100644 index 0000000..fc13459 --- /dev/null +++ b/write_test.go @@ -0,0 +1,218 @@ +package cnm + +import ( +	"bytes" +	"testing" +) + +var writeTests = map[string]*Document{ +	"": &Document{}, + +	"title\n\tfoo bar\n": &Document{ +		Title: "foo bar", +	}, + +	"title\n\tfoo bar baz\n" + +		"links\n\tqwe asd\n\tzxc 123\n\tfoo\n" + +		"site\n\tfoo\n\t\tbar\n\t\tbaz/quux\n\t\t\t123\n\ttest\n": &Document{ +		Title: "foo bar baz", +		Links: []Link{ +			Link{"qwe", "asd", ""}, +			Link{"zxc", "123", ""}, +			Link{"foo", "", ""}, +		}, +		Site: Site{ +			Children: []Site{ +				Site{ +					Path: "foo", +					Children: []Site{ +						Site{Path: "bar"}, +						Site{ +							Path: "baz/quux", +							Children: []Site{ +								Site{Path: "123"}, +							}, +						}, +					}, +				}, +				Site{Path: "test"}, +			}, +		}, +	}, + +	`title +	Test document +content +	section Test section +		text +			This is \n just a +		text pre +			  t  e  +			 s  t   +				 +		raw text/plain +			of various \n features +		section of the +			table +				header +					text +						Column 1 +					text +						Column 2 +				row +					text +						CNM +					text +						document + +						format +				row +					section +					list +						text +							ipsum +						list ordered +							list unordered +								text +									dolor + +									sit amet +	embed text/cnm cnp://example.com/ +		thing whatever +`: &Document{ +		Title: "Test document", +		Content: &ContentBlock{ +			name: "content", +			args: nil, +			children: []Block{ +				&SectionBlock{ContentBlock{ +					name: "section", +					args: []string{"Test", "section"}, +					children: []Block{ +						&TextBlock{ +							Contents: TextPlainContents{[]string{ +								"This is \n just a", +							}}, +						}, +						&TextBlock{ +							Format: "pre", +							Contents: TextPreContents{ +								"  t  e \n s  t  \n\t", +							}, +						}, +						&RawBlock{ +							Syntax:   "text/plain", +							Contents: "of various \\n features", +						}, +						&SectionBlock{ContentBlock{ +							name: "section", +							args: []string{"of the"}, +							children: []Block{ +								&TableBlock{[]Block{ +									&HeaderBlock{ContentBlock{ +										name: "header", +										args: []string{}, +										children: []Block{ +											&TextBlock{ +												Contents: TextPlainContents{[]string{ +													"Column 1", +												}}, +											}, +											&TextBlock{ +												Contents: TextPlainContents{[]string{ +													"Column 2", +												}}, +											}, +										}, +									}}, +									&RowBlock{ContentBlock{ +										name: "row", +										args: []string{}, +										children: []Block{ +											&TextBlock{ +												Contents: TextPlainContents{[]string{ +													"CNM", +												}}, +											}, +											&TextBlock{ +												Contents: TextPlainContents{[]string{ +													"document", +													"format", +												}}, +											}, +										}, +									}}, +									&RowBlock{ContentBlock{ +										name: "row", +										args: []string{""}, +										children: []Block{ +											&SectionBlock{ContentBlock{ +												name: "section", +												args: []string{"", "", ""}, +											}}, +											&ListBlock{ContentBlock{ +												name: "list", +												args: nil, +												children: []Block{ +													&TextBlock{ +														Contents: TextPlainContents{[]string{ +															"ipsum", +														}}, +													}, +													&ListBlock{ContentBlock{ +														name: "list", +														args: []string{"ordered"}, +														children: []Block{ +															&ListBlock{ContentBlock{ +																name: "list", +																args: []string{"unordered"}, +																children: []Block{ +																	&TextBlock{ +																		Contents: TextPlainContents{[]string{ +																			"dolor", +																			"sit amet", +																		}}, +																	}, +																}, +															}}, +														}, +													}}, +												}, +											}}, +										}, +									}}, +								}}, +							}, +						}}, +					}, +				}}, +				&EmbedBlock{ +					Type:        "text/cnm", +					URL:         "cnp://example.com/", +					Description: "thing whatever", +				}, +			}, +		}, +	}, +} + +func TestWrite(t *testing.T) { +	for k, v := range writeTests { +		t.Run(k, func(t *testing.T) { +			var buf bytes.Buffer +			err := v.Write(&buf) +			if err != nil { +				t.Fatalf("Write error: %v", err) +			} +			w := buf.String() +			t.Log("====================") +			t.Log("expected:\n" + k) +			t.Log("--------------------") +			t.Log("     got:\n" + w) +			t.Log("====================") +			if k != w { +				t.Fatal("Write: output did not match expected document") +			} +		}) +	} +} |