package main

import (
	"errors"
	"io"
	"log"
	"os"
	"path/filepath"
	"strings"
	"unicode"

	"golang.org/x/text/transform"

	"github.com/yasushi-saito/rbtree"
)

type InputIndex []IndexEntry

func NewInputIndex(option *InputOptions, style *InputStyle) *InputIndex {
	inset := rbtree.NewTree(CompareIndexEntry)

	if option.stdin {
		readIdxFile(inset, os.Stdin, option, style)
	} else {
		for _, idxname := range option.input {
			// 文件不存在且无后缀时，加上默认后缀 .idx 再试
			if _, err := os.Stat(idxname); os.IsNotExist(err) && filepath.Ext(idxname) == "" {
				idxname = idxname + ".idx"
			}
			idxfile, err := os.Open(idxname)
			if err != nil {
				log.Fatalln(err.Error())
			}
			readIdxFile(inset, idxfile, option, style)
			idxfile.Close()
		}
	}

	var in InputIndex
	for iter := inset.Min(); !iter.Limit(); iter = iter.Next() {
		pentry := iter.Item().(*IndexEntry)
		in = append(in, *pentry)
	}
	return &in
}

func readIdxFile(inset *rbtree.Tree, idxfile *os.File, option *InputOptions, style *InputStyle) {
	log.Printf("读取输入文件 %s ……\n", idxfile.Name())
	accepted, rejected := 0, 0

	idxreader := NewNumberdReader(transform.NewReader(idxfile, option.decoder))
	for {
		entry, err := ScanIndexEntry(idxreader, option, style)
		if err == io.EOF {
			break
		} else if err == ScanSyntaxError {
			rejected++
			log.Printf("%s:%d: %s\n", idxfile.Name(), idxreader.Line(), err.Error())
			// 跳过一行
			if err := idxreader.SkipLine(); err == io.EOF {
				break
			} else if err != nil {
				log.Fatalln(err.Error())
			}
		} else if err != nil {
			log.Fatalln(err.Error())
		} else {
			accepted++
			if old := inset.Get(entry); old != nil {
				oldentry := old.(*IndexEntry)
				oldentry.pagelist = append(oldentry.pagelist, entry.pagelist...)
			} else {
				// entry 不在集合 inset 中时，插入 entry 本身和所有祖先节点，祖先不含页码
				for len(entry.level) > 0 {
					inset.Insert(entry)
					parent := &IndexEntry{
						level:    entry.level[:len(entry.level)-1],
						pagelist: nil,
					}
					if inset.Get(parent) != nil {
						break
					} else {
						entry = parent
					}
				}
			}
		}
	}
	log.Printf("接受 %d 项，拒绝 %d 项。\n", accepted, rejected)
}

// 跳过空白符和行注释
func skipspaces(reader *NumberdReader, style *InputStyle) error {
	for {
		r, _, err := reader.ReadRune()
		if err != nil {
			return err
		} else if r == style.comment { // 注释以 style.comment 开头，直至行末
			reader.SkipLine()
		} else if !unicode.IsSpace(r) {
			reader.UnreadRune()
			break
		}
	}
	return nil
}

func ScanIndexEntry(reader *NumberdReader, option *InputOptions, style *InputStyle) (*IndexEntry, error) {
	var entry IndexEntry
	page := new(Page)
	// 跳过空白符
	if err := skipspaces(reader, style); err != nil {
		return nil, err
	}
	// 跳过 keyword
	for _, r := range style.keyword {
		new_r, _, err := reader.ReadRune()
		if err != nil {
			return nil, err
		}
		if new_r != r {
			return nil, ScanSyntaxError
		}
	}
	// 跳过空白符
	if err := skipspaces(reader, style); err != nil {
		return nil, err
	}
	// 自动机状态
	const (
		SCAN_OPEN = iota
		SCAN_KEY
		SCAN_VALUE
		SCAN_COMMAND
		SCAN_PAGE
		SCAN_PAGERANGE
	)
	// 从 arg_open 开始扫描到 arg_close，处理索引项
	state := SCAN_OPEN
	quoted := false
	escaped := false
	arg_depth := 0
	var token []rune
	var entry_input []rune
	page.rangetype = PAGE_NORMAL
L_scan_kv:
	for {
		r, _, err := reader.ReadRune()
		entry_input = append(entry_input, r)
		if err != nil {
			return nil, err
		}
		//debug.Printf("字符 %2c, 状态 %d, quoted %5v, escaped %5v, arg_depth %d\n", r, state, quoted, escaped, arg_depth) //// DEBUG only
		switch state {
		case SCAN_OPEN:
			if !quoted && r == style.arg_open {
				state = SCAN_KEY
			} else {
				return nil, ScanSyntaxError
			}
		case SCAN_KEY:
			push_keyval := func(next int) {
				str := string(token)
				if option.compress {
					str = strings.TrimSpace(str)
				}
				entry.level = append(entry.level, IndexEntryLevel{key: str, text: str})
				token = nil
				state = next
			}
			if quoted {
				token = append(token, r)
				quoted = false
				break
			} else if r == style.arg_open && !escaped {
				token = append(token, r)
				arg_depth++
			} else if r == style.arg_close && !escaped {
				if arg_depth == 0 {
					push_keyval(0)
					break L_scan_kv
				} else {
					token = append(token, r)
					arg_depth--
				}
			} else if r == style.actual {
				push_keyval(SCAN_VALUE)
			} else if r == style.encap {
				push_keyval(SCAN_PAGERANGE)
			} else if r == style.level {
				push_keyval(SCAN_KEY)
			} else if r == style.quote && !escaped {
				quoted = true
			} else {
				token = append(token, r)
			}
			if r == style.escape {
				escaped = true
			} else {
				escaped = false
			}
		case SCAN_VALUE:
			set_value := func(next int) {
				str := string(token)
				entry.level[len(entry.level)-1].text = str
				token = nil
				state = next
			}
			// 暂不对 actual 特殊处理
			if quoted {
				token = append(token, r)
				quoted = false
				break
			} else if r == style.arg_open && !escaped {
				token = append(token, r)
				arg_depth++
			} else if r == style.arg_close && !escaped {
				if arg_depth == 0 {
					set_value(0)
					break L_scan_kv
				} else {
					token = append(token, r)
					arg_depth--
				}
			} else if r == style.encap {
				set_value(SCAN_PAGERANGE)
			} else if r == style.level {
				set_value(SCAN_KEY)
			} else if r == style.quote && !escaped {
				quoted = true
			} else {
				token = append(token, r)
			}
			if r == style.escape {
				escaped = true
			} else {
				escaped = false
			}
		case SCAN_PAGERANGE:
			if quoted {
				token = append(token, r)
				quoted = false
				break
			} else if r == style.arg_open || r == style.arg_close || r == style.actual || r == style.encap || r == style.level {
				// 注意 encap 符号后不能直接加 arg_open、arg_close 等符号
				return nil, ScanSyntaxError
			} else if r == style.range_open {
				page.rangetype = PAGE_OPEN
			} else if r == style.range_close {
				page.rangetype = PAGE_CLOSE
			} else if r == style.quote { // 之前是 encap，无须考虑被 escape 转义
				quoted = true
			} else {
				token = append(token, r)
			}
			state = SCAN_COMMAND
			if r == style.escape {
				escaped = true
			} else {
				escaped = false
			}
		case SCAN_COMMAND:
			// 不对 encap、actual、level 特殊处理
			if quoted {
				token = append(token, r)
				quoted = false
				break
			} else if r == style.arg_open && !escaped {
				token = append(token, r)
				arg_depth++
			} else if r == style.arg_close && !escaped {
				if arg_depth == 0 {
					page.encap = string(token)
					break L_scan_kv
				} else {
					token = append(token, r)
					arg_depth--
				}
			} else if r == style.quote && !escaped {
				quoted = true
			} else {
				token = append(token, r)
			}
			if r == style.escape {
				escaped = true
			} else {
				escaped = false
			}
		default:
			panic("扫描状态错误")
		}
	}
	entry.input = string(entry_input)
	// 跳过空白符
	if err := skipspaces(reader, style); err != nil {
		return nil, err
	}
	// 从 arg_open 开始扫描到 arg_close，处理页码
	state = SCAN_OPEN
	token = nil
L_scan_page:
	for {
		r, _, err := reader.ReadRune()
		if err != nil {
			return nil, err
		}
		// debug.Printf("字符 %c, 状态 %d\n", r, state) //// DEBUG only
		switch state {
		case SCAN_OPEN:
			if r == style.arg_open {
				state = SCAN_PAGE
			} else {
				return nil, ScanSyntaxError
			}
		case SCAN_PAGE:
			if r == style.arg_close {
				page.numbers, err = scanPage(token, style.page_compositor)
				if err != nil {
					return nil, err
				}
				break L_scan_page
			} else if r == style.arg_open {
				return nil, ScanSyntaxError
			} else {
				token = append(token, r)
			}
		default:
			panic("扫描状态错误")
		}
		// 未实现对 style.page_compositor 的处理
	}
	page.compositor = style.page_compositor
	entry.pagelist = append(entry.pagelist, page)
	// debug.Println(entry) //// DEBUG only
	return &entry, nil
}

var ScanSyntaxError = errors.New("索引项语法错误")

type IndexEntry struct {
	input    string
	level    []IndexEntryLevel
	pagelist []*Page
}

// 实现 rbtree.CompareFunc
func CompareIndexEntry(a, b rbtree.Item) int {
	x := a.(*IndexEntry)
	y := b.(*IndexEntry)
	for i := range x.level {
		if i >= len(y.level) {
			return 1
		}
		if x.level[i].key < y.level[i].key {
			return -1
		} else if x.level[i].key > y.level[i].key {
			return 1
		}
		if x.level[i].text < y.level[i].text {
			return -1
		} else if x.level[i].text > y.level[i].text {
			return 1
		}
	}
	if len(x.level) < len(y.level) {
		return -1
	}
	return 0
}

// 一条索引条目中的一级
type IndexEntryLevel struct {
	key  string
	text string
}

type RangeType int

const (
	PAGE_UNKNOWN RangeType = iota
	PAGE_OPEN
	PAGE_NORMAL
	PAGE_CLOSE
)

func (rt RangeType) String() string {
	switch rt {
	case PAGE_UNKNOWN:
		return "?"
	case PAGE_OPEN:
		return "("
	case PAGE_NORMAL:
		return "."
	case PAGE_CLOSE:
		return ")"
	default:
		panic("区间格式错误")
	}
}
