From 26248678aafc2f8e277d4bdafc116f2b349b02c5 Mon Sep 17 00:00:00 2001
From: clsr <clsr@clsr.net>
Date: Fri, 18 Aug 2017 13:45:49 +0200
Subject: Initial commit

---
 parse.go | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 189 insertions(+)
 create mode 100644 parse.go

(limited to 'parse.go')

diff --git a/parse.go b/parse.go
new file mode 100644
index 0000000..02f7eb2
--- /dev/null
+++ b/parse.go
@@ -0,0 +1,189 @@
+package cnm
+
+import (
+	"bufio"
+	"io"
+)
+
+// TopLevel represents the top-level block.
+var TopLevel = &TokenBlock{
+	TokenLine: TokenLine{
+		Indentation: -1,
+		RawLine:     "",
+		LineNo:      0,
+	},
+	Parent: nil,
+	Name:   "",
+	Args:   nil,
+}
+
+// Parser parses a CNM document by lines.
+type Parser struct {
+	r       *bufio.Reader
+	line    int
+	block   *TokenBlock
+	current *TokenLine
+	end     bool
+}
+
+// NewParser creates a new Parser that reads from r.
+func NewParser(r io.Reader) *Parser {
+	return &Parser{
+		r:       bufio.NewReader(r),
+		line:    0,
+		block:   TopLevel,
+		current: nil,
+		end:     false,
+	}
+}
+
+// Line returns the number of the last parsed line in the document, starting
+// with 1 after the first line.
+func (p *Parser) Line() int {
+	return p.line
+}
+
+// Next retrieves the next line.
+func (p *Parser) Next() error {
+	line, err := p.nextLine()
+	if err != nil {
+		return err
+	}
+	indent := 0
+	for _, c := range line {
+		if c != '\t' {
+			break
+		}
+		indent++
+	}
+	if indent > p.block.Indent()+1 {
+		indent = p.block.Indent() + 1
+	}
+	p.current = &TokenLine{
+		Indentation: indent,
+		RawLine:     line,
+		LineNo:      p.line,
+	}
+	if p.current.Indent() <= p.block.Indent() && !p.Empty() {
+		p.block = p.block.Parent
+	}
+	return nil
+}
+
+// Indent returns the indentation of the current line.
+//
+// Returns -1 if no line has been read yet.
+func (p *Parser) Indent() int {
+	if p.current == nil {
+		return -1
+	}
+	return p.current.Indent()
+}
+
+// Empty returns true if the current line is empty.
+func (p *Parser) Empty() bool {
+	if p.current == nil {
+		return true
+	}
+	if p.current.Indent() == len(p.current.Raw()) {
+		return true
+	}
+	return false
+}
+
+// Block parses the current line in block mode.
+//
+// Returns a TokenBlock if the line was not empty, otherwise TokenEmptyLine. In
+// block mode, a line is empty even if its indentation exceeds the block
+// content indentation, as long as it only contains tab characters.
+//
+// Next() must have been called before calling Block().
+func (p *Parser) Block() Token {
+	line := p.current.Raw()[p.current.Indent():]
+
+	/*indent := 0
+	for _, c := range line {
+		if c != '\t' {
+			break
+		}
+		indent++
+	}
+	if len(line) == indent {
+		return &TokenEmptyLine{*p.current}
+	}*/
+
+	ss := SplitUnescape(line)
+	if len(ss) == 0 || len(ss) == 1 && ss[0] == "" {
+		return &TokenEmptyLine{*p.current}
+	}
+
+	block := TokenBlock{
+		TokenLine: *p.current,
+		Parent:    p.block,
+	}
+	block.Name = ss[0]
+	if len(ss) > 1 {
+		block.Args = ss[1:]
+	}
+
+	p.block = &block
+
+	return &block
+}
+
+// RawText parses the current line as raw text.
+//
+// Returns a TokenRawText if the line was not empty, otherwise
+// TokenEmptyLine.
+//
+// Next() must have been called before calling RawText().
+func (p *Parser) RawText() Token {
+	if p.Empty() {
+		return &TokenEmptyLine{*p.current}
+	}
+	return &TokenRawText{
+		TokenLine: *p.current,
+		Text:      p.current.Raw()[p.current.Indent():],
+	}
+}
+
+// SimpleText parses the current line as simple text.
+//
+// Returns a TokenSimpleText if the line was not empty, otherwise
+// TokenEmptyLine.
+//
+// Next() must have been called before calling SimpleText().
+func (p *Parser) SimpleText() Token {
+	if p.Empty() {
+		return &TokenEmptyLine{*p.current}
+	}
+	return &TokenSimpleText{
+		TokenLine: *p.current,
+		Text:      ParseSimpleText(p.current.Raw()[p.current.Indent():]),
+	}
+}
+
+func (p *Parser) nextLine() (string, error) {
+	l, err := p.r.ReadString('\n')
+	if err == io.EOF {
+		if l != "" {
+			err = nil
+		} else if !p.end { // XXX
+			l = "\n"
+			p.end = true
+			err = nil
+		}
+	}
+	rs := make([]rune, len(l))
+	ri := 0
+	for _, r := range l {
+		switch r {
+		case '\n', '\r', '\x00':
+			continue
+		}
+		rs[ri] = r
+		ri++
+	}
+	p.line++
+	return string(rs[:ri]), err
+}
-- 
cgit