diff options
author | Victor Häggqvist <[email protected]> | 2022-09-14 23:54:17 +0200 |
---|---|---|
committer | Victor Häggqvist <[email protected]> | 2022-09-14 23:54:23 +0200 |
commit | 1bbce1aa65e4af84a144b137ea2037004eb2881d (patch) | |
tree | fa02ab592746556a884a8e75d480fb3804d26805 /vendor/gopkg.in/yaml.v2/scannerc.go | |
parent | 4e0fb944f18411381366e620ef5193f886957b6f (diff) |
go bump
Diffstat (limited to '')
-rw-r--r-- | vendor/gopkg.in/yaml.v3/scannerc.go (renamed from vendor/gopkg.in/yaml.v2/scannerc.go) | 363 |
1 files changed, 345 insertions, 18 deletions
diff --git a/vendor/gopkg.in/yaml.v2/scannerc.go b/vendor/gopkg.in/yaml.v3/scannerc.go index 0b9bb60..ca00701 100644 --- a/vendor/gopkg.in/yaml.v2/scannerc.go +++ b/vendor/gopkg.in/yaml.v3/scannerc.go @@ -1,3 +1,25 @@ +// +// Copyright (c) 2011-2019 Canonical Ltd +// Copyright (c) 2006-2010 Kirill Simonov +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + package yaml import ( @@ -489,6 +511,9 @@ func cache(parser *yaml_parser_t, length int) bool { // Advance the buffer pointer. func skip(parser *yaml_parser_t) { + if !is_blank(parser.buffer, parser.buffer_pos) { + parser.newlines = 0 + } parser.mark.index++ parser.mark.column++ parser.unread-- @@ -502,17 +527,22 @@ func skip_line(parser *yaml_parser_t) { parser.mark.line++ parser.unread -= 2 parser.buffer_pos += 2 + parser.newlines++ } else if is_break(parser.buffer, parser.buffer_pos) { parser.mark.index++ parser.mark.column = 0 parser.mark.line++ parser.unread-- parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) + parser.newlines++ } } // Copy a character to a string buffer and advance pointers. func read(parser *yaml_parser_t, s []byte) []byte { + if !is_blank(parser.buffer, parser.buffer_pos) { + parser.newlines = 0 + } w := width(parser.buffer[parser.buffer_pos]) if w == 0 { panic("invalid character sequence") @@ -564,6 +594,7 @@ func read_line(parser *yaml_parser_t, s []byte) []byte { parser.mark.column = 0 parser.mark.line++ parser.unread-- + parser.newlines++ return s } @@ -626,9 +657,13 @@ func trace(args ...interface{}) func() { func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { // While we need more tokens to fetch, do it. for { - if parser.tokens_head != len(parser.tokens) { - // If queue is non-empty, check if any potential simple key may - // occupy the head position. + // [Go] The comment parsing logic requires a lookahead of two tokens + // so that foot comments may be parsed in time of associating them + // with the tokens that are parsed before them, and also for line + // comments to be transformed into head comments in some edge cases. + if parser.tokens_head < len(parser.tokens)-2 { + // If a potential simple key is at the head position, we need to fetch + // the next token to disambiguate it. head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] if !ok { break @@ -649,7 +684,7 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { } // The dispatcher for token fetchers. -func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { +func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) { // Ensure that the buffer is initialized. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { return false @@ -660,13 +695,19 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { return yaml_parser_fetch_stream_start(parser) } + scan_mark := parser.mark + // Eat whitespaces and comments until we reach the next token. if !yaml_parser_scan_to_next_token(parser) { return false } + // [Go] While unrolling indents, transform the head comments of prior + // indentation levels observed after scan_start into foot comments at + // the respective indexes. + // Check the indentation level against the current column. - if !yaml_parser_unroll_indent(parser, parser.mark.column) { + if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) { return false } @@ -699,6 +740,26 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) } + comment_mark := parser.mark + if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') { + // Associate any following comments with the prior token. + comment_mark = parser.tokens[len(parser.tokens)-1].start_mark + } + defer func() { + if !ok { + return + } + if len(parser.tokens) > 0 && parser.tokens[len(parser.tokens)-1].typ == yaml_BLOCK_ENTRY_TOKEN { + // Sequence indicators alone have no line comments. It becomes + // a head comment for whatever follows. + return + } + if !yaml_parser_scan_line_comment(parser, comment_mark) { + ok = false + return + } + }() + // Is it the flow sequence start indicator? if buf[pos] == '[' { return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) @@ -792,7 +853,7 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { // if it is followed by a non-space character. // // The last rule is more restrictive than the specification requires. - // [Go] Make this logic more reasonable. + // [Go] TODO Make this logic more reasonable. //switch parser.buffer[parser.buffer_pos] { //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': //} @@ -965,19 +1026,49 @@ func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml // Pop indentation levels from the indents stack until the current level // becomes less or equal to the column. For each indentation level, append // the BLOCK-END token. -func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool { +func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool { // In the flow context, do nothing. if parser.flow_level > 0 { return true } + block_mark := scan_mark + block_mark.index-- + // Loop through the indentation levels in the stack. for parser.indent > column { + + // [Go] Reposition the end token before potential following + // foot comments of parent blocks. For that, search + // backwards for recent comments that were at the same + // indent as the block that is ending now. + stop_index := block_mark.index + for i := len(parser.comments) - 1; i >= 0; i-- { + comment := &parser.comments[i] + + if comment.end_mark.index < stop_index { + // Don't go back beyond the start of the comment/whitespace scan, unless column < 0. + // If requested indent column is < 0, then the document is over and everything else + // is a foot anyway. + break + } + if comment.start_mark.column == parser.indent+1 { + // This is a good match. But maybe there's a former comment + // at that same indent level, so keep searching. + block_mark = comment.start_mark + } + + // While the end of the former comment matches with + // the start of the following one, we know there's + // nothing in between and scanning is still safe. + stop_index = comment.scan_mark.index + } + // Create a token and append it to the queue. token := yaml_token_t{ typ: yaml_BLOCK_END_TOKEN, - start_mark: parser.mark, - end_mark: parser.mark, + start_mark: block_mark, + end_mark: block_mark, } yaml_insert_token(parser, -1, &token) @@ -1026,7 +1117,7 @@ func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { } // Reset the indentation level. - if !yaml_parser_unroll_indent(parser, -1) { + if !yaml_parser_unroll_indent(parser, -1, parser.mark) { return false } @@ -1050,7 +1141,7 @@ func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { // Reset the indentation level. - if !yaml_parser_unroll_indent(parser, -1) { + if !yaml_parser_unroll_indent(parser, -1, parser.mark) { return false } @@ -1074,7 +1165,7 @@ func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { // Produce the DOCUMENT-START or DOCUMENT-END token. func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { // Reset the indentation level. - if !yaml_parser_unroll_indent(parser, -1) { + if !yaml_parser_unroll_indent(parser, -1, parser.mark) { return false } @@ -1107,6 +1198,7 @@ func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_ // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // The indicators '[' and '{' may start a simple key. if !yaml_parser_save_simple_key(parser) { return false @@ -1442,6 +1534,8 @@ func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { // Eat whitespaces and comments until the next token is found. func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { + scan_mark := parser.mark + // Until the next token is not found. for { // Allow the BOM mark to start a line. @@ -1468,13 +1562,33 @@ func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { } } + // Check if we just had a line comment under a sequence entry that + // looks more like a header to the following content. Similar to this: + // + // - # The comment + // - Some data + // + // If so, transform the line comment to a head comment and reposition. + if len(parser.comments) > 0 && len(parser.tokens) > 1 { + tokenA := parser.tokens[len(parser.tokens)-2] + tokenB := parser.tokens[len(parser.tokens)-1] + comment := &parser.comments[len(parser.comments)-1] + if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) { + // If it was in the prior line, reposition so it becomes a + // header of the follow up token. Otherwise, keep it in place + // so it becomes a header of the former. + comment.head = comment.line + comment.line = nil + if comment.start_mark.line == parser.mark.line-1 { + comment.token_mark = parser.mark + } + } + } + // Eat a comment until a line break. if parser.buffer[parser.buffer_pos] == '#' { - for !is_breakz(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } + if !yaml_parser_scan_comments(parser, scan_mark) { + return false } } @@ -1572,6 +1686,10 @@ func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool } if parser.buffer[parser.buffer_pos] == '#' { + // [Go] Discard this inline comment for the time being. + //if !yaml_parser_scan_line_comment(parser, start_mark) { + // return false + //} for !is_breakz(parser.buffer, parser.buffer_pos) { skip(parser) if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { @@ -1987,7 +2105,7 @@ func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', // '%'. - // [Go] Convert this into more reasonable logic. + // [Go] TODO Convert this into more reasonable logic. for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || @@ -2142,6 +2260,9 @@ func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, l } } if parser.buffer[parser.buffer_pos] == '#' { + if !yaml_parser_scan_line_comment(parser, start_mark) { + return false + } for !is_breakz(parser.buffer, parser.buffer_pos) { skip(parser) if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { @@ -2709,3 +2830,209 @@ func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) b } return true } + +func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool { + if parser.newlines > 0 { + return true + } + + var start_mark yaml_mark_t + var text []byte + + for peek := 0; peek < 512; peek++ { + if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) { + break + } + if is_blank(parser.buffer, parser.buffer_pos+peek) { + continue + } + if parser.buffer[parser.buffer_pos+peek] == '#' { + seen := parser.mark.index+peek + for { + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if is_breakz(parser.buffer, parser.buffer_pos) { + if parser.mark.index >= seen { + break + } + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + } else if parser.mark.index >= seen { + if len(text) == 0 { + start_mark = parser.mark + } + text = read(parser, text) + } else { + skip(parser) + } + } + } + break + } + if len(text) > 0 { + parser.comments = append(parser.comments, yaml_comment_t{ + token_mark: token_mark, + start_mark: start_mark, + line: text, + }) + } + return true +} + +func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool { + token := parser.tokens[len(parser.tokens)-1] + + if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 { + token = parser.tokens[len(parser.tokens)-2] + } + + var token_mark = token.start_mark + var start_mark yaml_mark_t + var next_indent = parser.indent + if next_indent < 0 { + next_indent = 0 + } + + var recent_empty = false + var first_empty = parser.newlines <= 1 + + var line = parser.mark.line + var column = parser.mark.column + + var text []byte + + // The foot line is the place where a comment must start to + // still be considered as a foot of the prior content. + // If there's some content in the currently parsed line, then + // the foot is the line below it. + var foot_line = -1 + if scan_mark.line > 0 { + foot_line = parser.mark.line-parser.newlines+1 + if parser.newlines == 0 && parser.mark.column > 1 { + foot_line++ + } + } + + var peek = 0 + for ; peek < 512; peek++ { + if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) { + break + } + column++ + if is_blank(parser.buffer, parser.buffer_pos+peek) { + continue + } + c := parser.buffer[parser.buffer_pos+peek] + var close_flow = parser.flow_level > 0 && (c == ']' || c == '}') + if close_flow || is_breakz(parser.buffer, parser.buffer_pos+peek) { + // Got line break or terminator. + if close_flow || !recent_empty { + if close_flow || first_empty && (start_mark.line == foot_line && token.typ != yaml_VALUE_TOKEN || start_mark.column-1 < next_indent) { + // This is the first empty line and there were no empty lines before, + // so this initial part of the comment is a foot of the prior token + // instead of being a head for the following one. Split it up. + // Alternatively, this might also be the last comment inside a flow + // scope, so it must be a footer. + if len(text) > 0 { + if start_mark.column-1 < next_indent { + // If dedented it's unrelated to the prior token. + token_mark = start_mark + } + parser.comments = append(parser.comments, yaml_comment_t{ + scan_mark: scan_mark, + token_mark: token_mark, + start_mark: start_mark, + end_mark: yaml_mark_t{parser.mark.index + peek, line, column}, + foot: text, + }) + scan_mark = yaml_mark_t{parser.mark.index + peek, line, column} + token_mark = scan_mark + text = nil + } + } else { + if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 { + text = append(text, '\n') + } + } + } + if !is_break(parser.buffer, parser.buffer_pos+peek) { + break + } + first_empty = false + recent_empty = true + column = 0 + line++ + continue + } + + if len(text) > 0 && (close_flow || column-1 < next_indent && column != start_mark.column) { + // The comment at the different indentation is a foot of the + // preceding data rather than a head of the upcoming one. + parser.comments = append(parser.comments, yaml_comment_t{ + scan_mark: scan_mark, + token_mark: token_mark, + start_mark: start_mark, + end_mark: yaml_mark_t{parser.mark.index + peek, line, column}, + foot: text, + }) + scan_mark = yaml_mark_t{parser.mark.index + peek, line, column} + token_mark = scan_mark + text = nil + } + + if parser.buffer[parser.buffer_pos+peek] != '#' { + break + } + + if len(text) == 0 { + start_mark = yaml_mark_t{parser.mark.index + peek, line, column} + } else { + text = append(text, '\n') + } + + recent_empty = false + + // Consume until after the consumed comment line. + seen := parser.mark.index+peek + for { + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + if is_breakz(parser.buffer, parser.buffer_pos) { + if parser.mark.index >= seen { + break + } + if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { + return false + } + skip_line(parser) + } else if parser.mark.index >= seen { + text = read(parser, text) + } else { + skip(parser) + } + } + + peek = 0 + column = 0 + line = parser.mark.line + next_indent = parser.indent + if next_indent < 0 { + next_indent = 0 + } + } + + if len(text) > 0 { + parser.comments = append(parser.comments, yaml_comment_t{ + scan_mark: scan_mark, + token_mark: start_mark, + start_mark: start_mark, + end_mark: yaml_mark_t{parser.mark.index + peek - 1, line, column}, + head: text, + }) + } + return true +} |