Skip to content

Commit d12ea39

Browse files
committed
Improve comment handling in pp.
1 parent 30f8348 commit d12ea39

File tree

4 files changed

+333
-205
lines changed

4 files changed

+333
-205
lines changed

src/comp/front/lexer.rs

+162-57
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ state type reader = state obj {
2222
fn get_mark_chpos() -> uint;
2323
fn get_interner() -> @interner::interner[str];
2424
fn get_chpos() -> uint;
25+
fn get_col() -> uint;
2526
fn get_filemap() -> codemap::filemap;
2627
fn err(str m);
2728
};
@@ -33,6 +34,7 @@ fn new_reader(session sess, io::reader rdr,
3334
state obj reader(session sess,
3435
str file,
3536
uint len,
37+
mutable uint col,
3638
mutable uint pos,
3739
mutable char ch,
3840
mutable uint mark_chpos,
@@ -68,9 +70,11 @@ fn new_reader(session sess, io::reader rdr,
6870

6971
fn bump() {
7072
if (pos < len) {
73+
col += 1u;
7174
chpos += 1u;
7275
if (ch == '\n') {
7376
codemap::next_line(fm, chpos);
77+
col = 0u;
7478
}
7579
auto next = str::char_range_at(file, pos);
7680
pos = next._1;
@@ -82,6 +86,10 @@ fn new_reader(session sess, io::reader rdr,
8286

8387
fn get_interner() -> @interner::interner[str] { ret itr; }
8488

89+
fn get_col() -> uint {
90+
ret col;
91+
}
92+
8593
fn get_filemap() -> codemap::filemap {
8694
ret fm;
8795
}
@@ -92,7 +100,8 @@ fn new_reader(session sess, io::reader rdr,
92100
}
93101
auto file = str::unsafe_from_bytes(rdr.read_whole_stream());
94102
let vec[str] strs = [];
95-
auto rd = reader(sess, file, str::byte_len(file), 0u, -1 as char,
103+
auto rd = reader(sess, file, str::byte_len(file), 0u, 0u,
104+
-1 as char,
96105
filemap.start_pos, filemap.start_pos,
97106
strs, filemap, itr);
98107
rd.init();
@@ -155,7 +164,7 @@ fn is_whitespace(char c) -> bool {
155164
ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
156165
}
157166

158-
fn consume_any_whitespace(&reader rdr) {
167+
fn consume_whitespace_and_comments(&reader rdr) {
159168
while (is_whitespace(rdr.curr())) {
160169
rdr.bump();
161170
}
@@ -170,7 +179,7 @@ fn consume_any_line_comment(&reader rdr) {
170179
rdr.bump();
171180
}
172181
// Restart whitespace munch.
173-
be consume_any_whitespace(rdr);
182+
be consume_whitespace_and_comments(rdr);
174183
}
175184
case ('*') {
176185
rdr.bump();
@@ -207,7 +216,7 @@ fn consume_block_comment(&reader rdr) {
207216
}
208217
}
209218
// restart whitespace munch.
210-
be consume_any_whitespace(rdr);
219+
be consume_whitespace_and_comments(rdr);
211220
}
212221

213222
fn digits_to_string(str s) -> int {
@@ -430,7 +439,7 @@ fn scan_numeric_escape(&reader rdr, uint n_hex_digits) -> char {
430439
fn next_token(&reader rdr) -> token::token {
431440
auto accum_str = "";
432441

433-
consume_any_whitespace(rdr);
442+
consume_whitespace_and_comments(rdr);
434443

435444
if (rdr.is_eof()) { ret token::EOF; }
436445

@@ -720,70 +729,161 @@ fn next_token(&reader rdr) -> token::token {
720729
fail;
721730
}
722731

723-
tag cmnt_ {
724-
cmnt_line(str);
725-
cmnt_block(vec[str]);
732+
733+
tag cmnt_style {
734+
isolated; // No code on either side of each line of the comment
735+
trailing; // Code exists to the left of the comment
736+
mixed; // Code before /* foo */ and after the comment
726737
}
727738

728-
type cmnt = rec(cmnt_ val, uint pos, bool space_after);
739+
type cmnt = rec(cmnt_style style, vec[str] lines, uint pos);
729740

730-
fn consume_whitespace(&reader rdr) -> uint {
731-
auto lines = 0u;
732-
while (is_whitespace(rdr.curr())) {
733-
if (rdr.curr() == '\n') {lines += 1u;}
741+
fn read_to_eol(&reader rdr) -> str {
742+
auto val = "";
743+
while (rdr.curr() != '\n' && !rdr.is_eof()) {
744+
str::push_char(val, rdr.curr());
745+
rdr.bump();
746+
}
747+
if (rdr.curr() == '\n') {
734748
rdr.bump();
749+
} else {
750+
assert rdr.is_eof();
735751
}
736-
ret lines;
752+
ret val;
737753
}
738754

739-
fn read_line_comment(&reader rdr) -> cmnt {
740-
auto p = rdr.get_chpos();
741-
rdr.bump(); rdr.bump();
742-
while (rdr.curr() == ' ') {rdr.bump();}
743-
auto val = "";
744-
while (rdr.curr() != '\n' && !rdr.is_eof()) {
745-
str::push_char(val, rdr.curr());
755+
fn read_one_line_comment(&reader rdr) -> str {
756+
auto val = read_to_eol(rdr);
757+
assert val.(0) == ('/' as u8) && val.(1) == ('/' as u8);
758+
ret val;
759+
}
760+
761+
fn consume_whitespace(&reader rdr) {
762+
while (is_whitespace(rdr.curr()) && !rdr.is_eof()) {
746763
rdr.bump();
747764
}
748-
ret rec(val=cmnt_line(val),
749-
pos=p,
750-
space_after=consume_whitespace(rdr) > 1u);
751765
}
752766

753-
fn read_block_comment(&reader rdr) -> cmnt {
767+
768+
fn consume_non_eol_whitespace(&reader rdr) {
769+
while (is_whitespace(rdr.curr()) &&
770+
rdr.curr() != '\n' && !rdr.is_eof()) {
771+
rdr.bump();
772+
}
773+
}
774+
775+
776+
fn read_line_comments(&reader rdr, bool code_to_the_left) -> cmnt {
777+
log ">>> line comments";
754778
auto p = rdr.get_chpos();
755-
rdr.bump(); rdr.bump();
756-
while (rdr.curr() == ' ') {rdr.bump();}
757779
let vec[str] lines = [];
758-
auto val = "";
759-
auto level = 1;
760-
while (true) {
761-
if (rdr.curr() == '\n') {
762-
vec::push[str](lines, val);
763-
val = "";
764-
consume_whitespace(rdr);
780+
while (rdr.curr() == '/' && rdr.next() == '/') {
781+
lines += [read_one_line_comment(rdr)];
782+
consume_non_eol_whitespace(rdr);
783+
}
784+
log "<<< line comments";
785+
ret rec(style = if (code_to_the_left) { trailing } else { isolated },
786+
lines = lines,
787+
pos=p);
788+
}
789+
790+
fn all_whitespace(&str s, uint begin, uint end) -> bool {
791+
let uint i = begin;
792+
while (i != end) {
793+
if (!is_whitespace(s.(i) as char)) {
794+
ret false;
795+
}
796+
i += 1u;
797+
}
798+
ret true;
799+
}
800+
801+
fn trim_whitespace_prefix_and_push_line(&mutable vec[str] lines,
802+
&str s, uint col) {
803+
auto s1;
804+
if (all_whitespace(s, 0u, col)) {
805+
if (col < str::byte_len(s)) {
806+
s1 = str::slice(s, col, str::byte_len(s));
765807
} else {
766-
if (rdr.curr() == '*' && rdr.next() == '/') {
767-
level -= 1;
768-
if (level == 0) {
769-
rdr.bump(); rdr.bump();
770-
vec::push[str](lines, val);
771-
break;
772-
}
773-
} else if (rdr.curr() == '/' && rdr.next() == '*') {
774-
level += 1;
775-
}
776-
str::push_char(val, rdr.curr());
777-
rdr.bump();
808+
s1 = "";
778809
}
810+
} else {
811+
s1 = s;
812+
}
813+
log "pushing line: " + s1;
814+
lines += [s1];
815+
}
816+
817+
fn read_block_comment(&reader rdr,
818+
bool code_to_the_left) -> cmnt {
819+
log ">>> block comment";
820+
auto p = rdr.get_chpos();
821+
let vec[str] lines = [];
822+
let uint col = rdr.get_col();
823+
rdr.bump();
824+
rdr.bump();
825+
auto curr_line = "/*";
826+
let int level = 1;
827+
while (level > 0) {
828+
log #fmt("=== block comment level %d", level);
779829
if (rdr.is_eof()) {
780-
rdr.err("Unexpected end of file in block comment");
830+
rdr.err("unterminated block comment");
781831
fail;
782832
}
833+
if (rdr.curr() == '\n') {
834+
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
835+
curr_line = "";
836+
rdr.bump();
837+
} else {
838+
str::push_char(curr_line, rdr.curr());
839+
if (rdr.curr() == '/' && rdr.next() == '*') {
840+
rdr.bump();
841+
rdr.bump();
842+
curr_line += "*";
843+
level += 1;
844+
} else {
845+
if (rdr.curr() == '*' && rdr.next() == '/') {
846+
rdr.bump();
847+
rdr.bump();
848+
curr_line += "/";
849+
level -= 1;
850+
} else {
851+
rdr.bump();
852+
}
853+
}
854+
}
783855
}
784-
ret rec(val=cmnt_block(lines),
785-
pos=p,
786-
space_after=consume_whitespace(rdr) > 1u);
856+
if (str::byte_len(curr_line) != 0u) {
857+
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
858+
}
859+
860+
auto style = if (code_to_the_left) { trailing } else { isolated };
861+
consume_non_eol_whitespace(rdr);
862+
if (!rdr.is_eof() &&
863+
rdr.curr() != '\n' &&
864+
vec::len(lines) == 1u) {
865+
style = mixed;
866+
}
867+
log "<<< block comment";
868+
ret rec(style = style, lines = lines, pos=p);
869+
}
870+
871+
fn peeking_at_comment(&reader rdr) -> bool {
872+
ret (rdr.curr() == '/' && rdr.next() == '/') ||
873+
(rdr.curr() == '/' && rdr.next() == '*');
874+
}
875+
876+
fn consume_comment(&reader rdr, bool code_to_the_left,
877+
&mutable vec[cmnt] comments) {
878+
log ">>> consume comment";
879+
if (rdr.curr() == '/' && rdr.next() == '/') {
880+
vec::push[cmnt](comments,
881+
read_line_comments(rdr, code_to_the_left));
882+
} else if (rdr.curr() == '/' && rdr.next() == '*') {
883+
vec::push[cmnt](comments,
884+
read_block_comment(rdr, code_to_the_left));
885+
} else { fail; }
886+
log "<<< consume comment";
787887
}
788888

789889
fn gather_comments(session sess, str path) -> vec[cmnt] {
@@ -793,17 +893,22 @@ fn gather_comments(session sess, str path) -> vec[cmnt] {
793893
let vec[cmnt] comments = [];
794894
while (!rdr.is_eof()) {
795895
while (true) {
796-
consume_whitespace(rdr);
797-
if (rdr.curr() == '/' && rdr.next() == '/') {
798-
vec::push[cmnt](comments, read_line_comment(rdr));
799-
} else if (rdr.curr() == '/' && rdr.next() == '*') {
800-
vec::push[cmnt](comments, read_block_comment(rdr));
801-
} else { break; }
896+
auto code_to_the_left = true;
897+
consume_non_eol_whitespace(rdr);
898+
if (rdr.next() == '\n') {
899+
code_to_the_left = false;
900+
consume_whitespace(rdr);
901+
}
902+
while (peeking_at_comment(rdr)) {
903+
consume_comment(rdr, code_to_the_left, comments);
904+
consume_whitespace(rdr);
905+
}
906+
break;
802907
}
803908
next_token(rdr);
804909
}
805910
ret comments;
806-
}
911+
}
807912

808913

809914
//

src/comp/front/parser.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ fn new_parser(session::session sess,
164164
auto itr = @interner::mk[str](str::hash, str::eq);
165165
auto rdr = lexer::new_reader(sess, srdr, filemap, itr);
166166
// Make sure npos points at first actual token:
167-
lexer::consume_any_whitespace(rdr);
167+
lexer::consume_whitespace_and_comments(rdr);
168168
auto npos = rdr.get_chpos();
169169
ret stdio_parser(sess, env, ftype, lexer::next_token(rdr),
170170
npos, npos, npos, initial_def._1, UNRESTRICTED,

0 commit comments

Comments
 (0)