From 26248678aafc2f8e277d4bdafc116f2b349b02c5 Mon Sep 17 00:00:00 2001 From: clsr Date: Fri, 18 Aug 2017 13:45:49 +0200 Subject: Initial commit --- parse.go | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 parse.go (limited to 'parse.go') diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..02f7eb2 --- /dev/null +++ b/parse.go @@ -0,0 +1,189 @@ +package cnm + +import ( + "bufio" + "io" +) + +// TopLevel represents the top-level block. +var TopLevel = &TokenBlock{ + TokenLine: TokenLine{ + Indentation: -1, + RawLine: "", + LineNo: 0, + }, + Parent: nil, + Name: "", + Args: nil, +} + +// Parser parses a CNM document by lines. +type Parser struct { + r *bufio.Reader + line int + block *TokenBlock + current *TokenLine + end bool +} + +// NewParser creates a new Parser that reads from r. +func NewParser(r io.Reader) *Parser { + return &Parser{ + r: bufio.NewReader(r), + line: 0, + block: TopLevel, + current: nil, + end: false, + } +} + +// Line returns the number of the last parsed line in the document, starting +// with 1 after the first line. +func (p *Parser) Line() int { + return p.line +} + +// Next retrieves the next line. +func (p *Parser) Next() error { + line, err := p.nextLine() + if err != nil { + return err + } + indent := 0 + for _, c := range line { + if c != '\t' { + break + } + indent++ + } + if indent > p.block.Indent()+1 { + indent = p.block.Indent() + 1 + } + p.current = &TokenLine{ + Indentation: indent, + RawLine: line, + LineNo: p.line, + } + if p.current.Indent() <= p.block.Indent() && !p.Empty() { + p.block = p.block.Parent + } + return nil +} + +// Indent returns the indentation of the current line. +// +// Returns -1 if no line has been read yet. +func (p *Parser) Indent() int { + if p.current == nil { + return -1 + } + return p.current.Indent() +} + +// Empty returns true if the current line is empty. +func (p *Parser) Empty() bool { + if p.current == nil { + return true + } + if p.current.Indent() == len(p.current.Raw()) { + return true + } + return false +} + +// Block parses the current line in block mode. +// +// Returns a TokenBlock if the line was not empty, otherwise TokenEmptyLine. In +// block mode, a line is empty even if its indentation exceeds the block +// content indentation, as long as it only contains tab characters. +// +// Next() must have been called before calling Block(). +func (p *Parser) Block() Token { + line := p.current.Raw()[p.current.Indent():] + + /*indent := 0 + for _, c := range line { + if c != '\t' { + break + } + indent++ + } + if len(line) == indent { + return &TokenEmptyLine{*p.current} + }*/ + + ss := SplitUnescape(line) + if len(ss) == 0 || len(ss) == 1 && ss[0] == "" { + return &TokenEmptyLine{*p.current} + } + + block := TokenBlock{ + TokenLine: *p.current, + Parent: p.block, + } + block.Name = ss[0] + if len(ss) > 1 { + block.Args = ss[1:] + } + + p.block = &block + + return &block +} + +// RawText parses the current line as raw text. +// +// Returns a TokenRawText if the line was not empty, otherwise +// TokenEmptyLine. +// +// Next() must have been called before calling RawText(). +func (p *Parser) RawText() Token { + if p.Empty() { + return &TokenEmptyLine{*p.current} + } + return &TokenRawText{ + TokenLine: *p.current, + Text: p.current.Raw()[p.current.Indent():], + } +} + +// SimpleText parses the current line as simple text. +// +// Returns a TokenSimpleText if the line was not empty, otherwise +// TokenEmptyLine. +// +// Next() must have been called before calling SimpleText(). +func (p *Parser) SimpleText() Token { + if p.Empty() { + return &TokenEmptyLine{*p.current} + } + return &TokenSimpleText{ + TokenLine: *p.current, + Text: ParseSimpleText(p.current.Raw()[p.current.Indent():]), + } +} + +func (p *Parser) nextLine() (string, error) { + l, err := p.r.ReadString('\n') + if err == io.EOF { + if l != "" { + err = nil + } else if !p.end { // XXX + l = "\n" + p.end = true + err = nil + } + } + rs := make([]rune, len(l)) + ri := 0 + for _, r := range l { + switch r { + case '\n', '\r', '\x00': + continue + } + rs[ri] = r + ri++ + } + p.line++ + return string(rs[:ri]), err +} -- cgit