summaryrefslogtreecommitdiffstats
path: root/cnmfmt/cnmfmt.go
diff options
context:
space:
mode:
authorclsr <clsr@clsr.net>2017-08-18 13:45:49 +0200
committerclsr <clsr@clsr.net>2017-08-18 13:45:49 +0200
commit26248678aafc2f8e277d4bdafc116f2b349b02c5 (patch)
tree15f82488edb8c05aae756443284731875f36737c /cnmfmt/cnmfmt.go
downloadcnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.tar.gz
cnm-go-26248678aafc2f8e277d4bdafc116f2b349b02c5.zip
Initial commitv0.1.0
Diffstat (limited to 'cnmfmt/cnmfmt.go')
-rw-r--r--cnmfmt/cnmfmt.go525
1 files changed, 525 insertions, 0 deletions
diff --git a/cnmfmt/cnmfmt.go b/cnmfmt/cnmfmt.go
new file mode 100644
index 0000000..cb8dc64
--- /dev/null
+++ b/cnmfmt/cnmfmt.go
@@ -0,0 +1,525 @@
+// Package cnmfmt provides parsing and composition for CNMfmt formatting.
+package cnmfmt // import "contnet.org/lib/cnm-go/cnmfmt"
+
+import (
+ "bytes"
+ "io"
+ "strings"
+
+ "contnet.org/lib/cnm-go"
+)
+
+func init() {
+ cnm.RegisterTextContentParser("fmt", parseTextFmt)
+}
+
+// Text represents a paragraph of CNMfmt text.
+type Text struct {
+ // Spans are spans of formatted text.
+ Spans []Span
+}
+
+// ParseParagraph parses a single CNMfmt text paragraph s.
+func ParseParagraph(s string) Text {
+ s = cnm.CollapseWhitespace(s)
+
+ t := Text{}
+ var buf bytes.Buffer
+ format := Format{}
+ last := rune(-1)
+ url := false
+
+ for _, r := range s {
+ if url && format.Link == "" { // need URL for link
+ if handleURL(r, &last, &format, &buf) {
+ continue
+ }
+ }
+
+ switch r {
+ case '*', '/', '_', '`', '@':
+ handleTag(r, &last, &t, &format, &buf, &url)
+
+ case '\\':
+ if last == '\\' {
+ buf.WriteString("\\\\")
+ last = -1
+ } else {
+ if last >= 0 {
+ buf.WriteRune(last)
+ }
+ last = '\\'
+ }
+
+ default:
+ if last >= 0 {
+ buf.WriteRune(last)
+ }
+ buf.WriteRune(r)
+ last = -1
+ }
+ }
+
+ if url && format.Link == "" {
+ if last >= 0 {
+ buf.WriteRune(last)
+ }
+ format.Link = Unescape(buf.String())
+ buf.Reset()
+ } else if last >= 0 {
+ buf.WriteRune(last)
+ }
+ last = -1
+ handleTag(-1, &last, &t, &format, &buf, &url)
+
+ t.trimUnescape()
+
+ return t
+}
+
+func (t *Text) trimUnescape() {
+ var spans []Span
+
+ for _, span := range t.Spans {
+ if span.Text != "" || span.Format.Link != "" {
+ spans = append(spans, span)
+ }
+ }
+ t.Spans, spans = spans, nil
+
+ for i := len(t.Spans) - 1; i >= 0; i-- {
+ span := t.Spans[i]
+ if span.Text != "" || span.Format.Link != "" {
+ spans = append(spans, span)
+ }
+ }
+ for i := 0; i < len(spans)/2; i++ {
+ spans[i], spans[len(spans)-1-i] = spans[len(spans)-1-i], spans[i]
+ }
+ t.Spans = spans
+
+ for i := range t.Spans {
+ t.Spans[i].Text = Unescape(t.Spans[i].Text)
+ }
+}
+
+func (t *Text) appendSpan(format Format, txt string) {
+ if txt != "" || format.Link != "" {
+ t.Spans = append(t.Spans, Span{format, txt})
+ }
+}
+
+func handleURL(r rune, last *rune, format *Format, buf *bytes.Buffer) bool {
+ if r == '@' && *last == '@' { // end without text
+ format.Link = Unescape(buf.String())
+ buf.Reset()
+ return false
+ } else if *last == '\\' {
+ buf.WriteByte('\\')
+ buf.WriteRune(r)
+ *last = -1
+ } else if r == '\\' || r == '@' {
+ *last = r
+ } else if r != ' ' { // url
+ buf.WriteRune(r)
+ } else if buf.Len() > 0 { // space, then text
+ format.Link = Unescape(buf.String())
+ buf.Reset()
+ } // else: prefix space
+ return true
+}
+
+func handleTag(r rune, last *rune, txt *Text, format *Format, buf *bytes.Buffer, url *bool) {
+ if *last == '\\' {
+ buf.WriteRune(r)
+ *last = -1
+ } else if *last == r {
+ txt.appendSpan(*format, buf.String())
+ buf.Reset()
+ switch r {
+ case '*':
+ format.Bold = !format.Bold
+ case '/':
+ format.Italic = !format.Italic
+ case '_':
+ format.Underline = !format.Underline
+ case '`':
+ format.Monospace = !format.Monospace
+ case '@':
+ format.Link = ""
+ *url = !*url
+ }
+ *last = -1
+ } else {
+ switch *last {
+ case '*', '/', '_', '`', '@':
+ buf.WriteRune(*last)
+ }
+ *last = r
+ }
+}
+
+// WriteIndent writes the formatted text indented by n tabs.
+func (t Text) WriteIndent(w io.Writer, n int) error {
+ var state [5]byte // bold, italic, underline, monospace, link
+ si := 0
+ format := Format{}
+ spans := EscapeSpans(t.Spans)
+ var line []string
+ for _, span := range spans {
+ order := tagOrder(state[:si], format, span.Format)
+ for _, f := range order {
+ switch f {
+ case '*':
+ format.Bold = !format.Bold
+ line = append(line, "**")
+ case '/':
+ format.Italic = !format.Italic
+ line = append(line, "//")
+ case '_':
+ format.Underline = !format.Underline
+ line = append(line, "__")
+ case '`':
+ format.Monospace = !format.Monospace
+ line = append(line, "``")
+ case '@':
+ if format.Link != "" {
+ line = append(line, "@@")
+ }
+ if span.Format.Link != "" {
+ pad := ""
+ if span.Text != "" {
+ pad = " "
+ }
+ line = append(line, "@@", cnm.Escape(span.Format.Link), pad)
+ }
+ }
+ }
+ line = append(line, span.Text)
+ si = cleanupTags(state[:], order, span.Format)
+ format = span.Format
+ }
+ return writeIndent(w, strings.Join(line, ""), n)
+}
+
+func tagOrder(state []byte, old, new Format) []byte {
+ ldiff := ""
+ if old.Link != new.Link {
+ ldiff = "1"
+ }
+ diff := Format{
+ Bold: old.Bold != new.Bold,
+ Italic: old.Italic != new.Italic,
+ Underline: old.Underline != new.Underline,
+ Monospace: old.Monospace != new.Monospace,
+ Link: ldiff,
+ }
+
+ var order [5]byte
+ oi := 0
+ for i := len(state) - 1; i >= 0; i-- {
+ switch state[i] {
+ case '*':
+ if diff.Bold {
+ order[oi] = '*'
+ oi++
+ diff.Bold = false
+ }
+ case '/':
+ if diff.Italic {
+ order[oi] = '/'
+ oi++
+ diff.Italic = false
+ }
+ case '_':
+ if diff.Underline {
+ order[oi] = '_'
+ oi++
+ diff.Underline = false
+ }
+ case '`':
+ if diff.Monospace {
+ order[oi] = '`'
+ oi++
+ diff.Monospace = false
+ }
+ case '@':
+ if diff.Link != "" {
+ order[oi] = '@'
+ oi++
+ diff.Link = ""
+ }
+ }
+ }
+
+ if diff.Bold {
+ order[oi] = '*'
+ oi++
+ }
+ if diff.Italic {
+ order[oi] = '/'
+ oi++
+ }
+ if diff.Underline {
+ order[oi] = '_'
+ oi++
+ }
+ if diff.Monospace {
+ order[oi] = '`'
+ oi++
+ }
+ if diff.Link != "" {
+ order[oi] = '@'
+ oi++
+ }
+
+ return order[:oi]
+}
+
+func cleanupTags(state []byte, order []byte, format Format) int {
+ var newState [10]byte
+ copy(newState[:5], state)
+ copy(newState[5:], order)
+ for i := range newState {
+ switch newState[i] {
+ case '*':
+ if !format.Bold {
+ newState[i] = 0
+ }
+ case '/':
+ if !format.Italic {
+ newState[i] = 0
+ }
+ case '_':
+ if !format.Underline {
+ newState[i] = 0
+ }
+ case '`':
+ if !format.Monospace {
+ newState[i] = 0
+ }
+ case '@':
+ if format.Link == "" {
+ newState[i] = 0
+ }
+ }
+ }
+ si := 0
+ for _, f := range newState {
+ if f > 0 {
+ state[si] = f
+ si++
+ }
+ }
+ return si
+}
+
+// Span represents a span of text with a format.
+type Span struct {
+ // Format is the format of the text.
+ Format Format
+
+ // Text is the text content of the span.
+ Text string
+}
+
+// Format represents a state of CNMfmt formatting.
+type Format struct {
+ // Bold text.
+ Bold bool
+
+ // Italic text.
+ Italic bool
+
+ // Underlined text.
+ Underline bool
+
+ // Monospaced text.
+ Monospace bool
+
+ // Hyperlink URL (if non-empty).
+ Link string
+}
+
+// Escape escapes CNMfmt and CNM text special characters.
+func Escape(s string) string {
+ return EscapeFmt(cnm.Escape(s))
+}
+
+// EscapeSpans escapes CNMfmt and CNM text within spans.
+//
+// This function will not needlessly escape spaces at the start or end of a
+// span if the sibling span contains nonspaces.
+func EscapeSpans(spans []Span) []Span {
+ // XXX: this is an ugly solution
+ esc := make([]Span, len(spans))
+ for i := range spans {
+ start := false
+ end := false
+ span := spans[i]
+ if i+1 < len(spans) {
+ s := spans[i+1].Text
+ if len(s) > 0 && s[0] != ' ' {
+ span.Text = span.Text + "x"
+ end = true
+ }
+ }
+ if i > 0 {
+ s := spans[i-1].Text
+ if len(s) > 0 && s[len(s)-1] != ' ' {
+ span.Text = "x" + span.Text
+ start = true
+ }
+ }
+ span.Text = Escape(span.Text)
+ if start {
+ span.Text = span.Text[1:]
+ }
+ if end {
+ span.Text = span.Text[:len(span.Text)-1]
+ }
+ esc[i] = span
+ }
+ return esc
+}
+
+var escapeReplacer = strings.NewReplacer(
+ `*`, `\*`,
+ `/`, `\/`,
+ `_`, `\_`,
+ "`", "\\`",
+ `@`, `\@`,
+)
+
+// EscapeFmt escapes only CNMfmt format toggle characters.
+func EscapeFmt(s string) string {
+ return escapeReplacer.Replace(s)
+}
+
+// Unescape resolves CNM text and CNMfmt escape sequences in s.
+func Unescape(s string) string {
+ return cnm.Unescape(UnescapeFmt(s))
+}
+
+var unescapeReplacer = strings.NewReplacer(
+ `\\`, `\\`,
+ `\*`, `*`,
+ `\/`, `/`,
+ `\_`, `_`,
+ "\\`", "`",
+ `\@`, `@`,
+)
+
+// UnescapeFmt resolves only CNMfmt escape sequences in s.
+func UnescapeFmt(s string) string {
+ return unescapeReplacer.Replace(s)
+}
+
+// TextFmtContents represents CNM `text fmt` contents.
+type TextFmtContents struct {
+ Paragraphs []Text
+}
+
+// NewTextFmtBlock creates a new `text fmt` block containing provided CNMfmt
+// paragraphs.
+func NewTextFmtBlock(paragraphs []Text) *cnm.TextBlock {
+ return cnm.NewTextBlock("fmt", TextFmtContents{paragraphs})
+}
+
+// WriteIndent writes the formatted text contents indented by n tabs.
+func (tf TextFmtContents) WriteIndent(w io.Writer, n int) error {
+ for i, p := range tf.Paragraphs {
+ if i != 0 {
+ if err := writeIndent(w, "", 0); err != nil {
+ return err
+ }
+ }
+ if err := p.WriteIndent(w, n); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// Parse parses paragraphs of CNMfmt text.
+func Parse(paragraphs string) []Text {
+ var txt []Text
+ var paragraph []string
+
+ for _, line := range strings.Split(paragraphs, "\n") {
+ end := false
+ if line != "" {
+ if strings.Trim(line, "\n\r\t\f ") == "" {
+ end = true
+ } else {
+ paragraph = append(paragraph, line)
+ }
+ } else if len(paragraph) > 0 {
+ end = true
+ }
+ if end {
+ txt = append(txt, ParseParagraph(strings.Join(paragraph, " ")))
+ paragraph = nil
+ }
+ }
+ if len(paragraph) > 0 {
+ txt = append(txt, ParseParagraph(strings.Join(paragraph, " ")))
+ }
+
+ return txt
+}
+
+func writeIndent(w io.Writer, s string, depth int) error {
+ const tabs = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+ if s == "" {
+ _, err := w.Write([]byte{'\n'})
+ return err
+ }
+ if depth == 0 {
+ _, err := w.Write([]byte(s + "\n"))
+ return err
+ }
+
+ var ind string
+ if depth <= len(tabs) {
+ ind = tabs[:depth]
+ } else {
+ ind = strings.Repeat("\t", depth)
+ }
+ _, err := w.Write([]byte(ind + s + "\n"))
+ return err
+
+}
+
+func parseTextFmt(p *cnm.Parser, block *cnm.TokenBlock) (cnm.TextContents, error) {
+ txt := TextFmtContents{}
+ var paragraph []string
+ var err error
+ for err == nil {
+ if !p.Empty() && p.Indent() <= block.Indent() {
+ break
+ }
+
+ token := p.RawText()
+ end := false
+ if text, ok := token.(*cnm.TokenRawText); ok {
+ if strings.Trim(text.Text, "\n\r\t\f ") == "" {
+ end = true
+ } else {
+ paragraph = append(paragraph, text.Text)
+ }
+ } else if _, ok := token.(*cnm.TokenEmptyLine); ok && len(paragraph) > 0 {
+ end = true
+ }
+ if end {
+ txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " ")))
+ paragraph = nil
+ }
+ err = p.Next()
+ }
+ if len(paragraph) > 0 {
+ txt.Paragraphs = append(txt.Paragraphs, ParseParagraph(strings.Join(paragraph, " ")))
+ }
+ return txt, err
+}