@@ -22,6 +22,7 @@ state type reader = state obj {
22
22
fn get_mark_chpos ( ) -> uint ;
23
23
fn get_interner ( ) -> @interner:: interner [ str ] ;
24
24
fn get_chpos ( ) -> uint ;
25
+ fn get_col ( ) -> uint ;
25
26
fn get_filemap ( ) -> codemap:: filemap ;
26
27
fn err ( str m) ;
27
28
} ;
@@ -33,6 +34,7 @@ fn new_reader(session sess, io::reader rdr,
33
34
state obj reader ( session sess,
34
35
str file,
35
36
uint len,
37
+ mutable uint col,
36
38
mutable uint pos,
37
39
mutable char ch,
38
40
mutable uint mark_chpos,
@@ -68,9 +70,11 @@ fn new_reader(session sess, io::reader rdr,
68
70
69
71
fn bump ( ) {
70
72
if ( pos < len) {
73
+ col += 1 u;
71
74
chpos += 1 u;
72
75
if ( ch == '\n' ) {
73
76
codemap:: next_line ( fm, chpos) ;
77
+ col = 0 u;
74
78
}
75
79
auto next = str:: char_range_at ( file, pos) ;
76
80
pos = next. _1 ;
@@ -82,6 +86,10 @@ fn new_reader(session sess, io::reader rdr,
82
86
83
87
fn get_interner ( ) -> @interner:: interner [ str ] { ret itr; }
84
88
89
+ fn get_col ( ) -> uint {
90
+ ret col;
91
+ }
92
+
85
93
fn get_filemap ( ) -> codemap:: filemap {
86
94
ret fm;
87
95
}
@@ -92,7 +100,8 @@ fn new_reader(session sess, io::reader rdr,
92
100
}
93
101
auto file = str:: unsafe_from_bytes ( rdr. read_whole_stream ( ) ) ;
94
102
let vec[ str] strs = [ ] ;
95
- auto rd = reader ( sess, file, str:: byte_len ( file) , 0 u, -1 as char ,
103
+ auto rd = reader ( sess, file, str:: byte_len ( file) , 0 u, 0 u,
104
+ -1 as char ,
96
105
filemap. start_pos , filemap. start_pos ,
97
106
strs, filemap, itr) ;
98
107
rd. init ( ) ;
@@ -155,7 +164,7 @@ fn is_whitespace(char c) -> bool {
155
164
ret c == ' ' || c == '\t' || c == '\r' || c == '\n' ;
156
165
}
157
166
158
- fn consume_any_whitespace ( & reader rdr) {
167
+ fn consume_whitespace_and_comments ( & reader rdr) {
159
168
while ( is_whitespace ( rdr. curr ( ) ) ) {
160
169
rdr. bump ( ) ;
161
170
}
@@ -170,7 +179,7 @@ fn consume_any_line_comment(&reader rdr) {
170
179
rdr. bump ( ) ;
171
180
}
172
181
// Restart whitespace munch.
173
- be consume_any_whitespace ( rdr) ;
182
+ be consume_whitespace_and_comments ( rdr) ;
174
183
}
175
184
case ( '*' ) {
176
185
rdr. bump ( ) ;
@@ -207,7 +216,7 @@ fn consume_block_comment(&reader rdr) {
207
216
}
208
217
}
209
218
// restart whitespace munch.
210
- be consume_any_whitespace ( rdr) ;
219
+ be consume_whitespace_and_comments ( rdr) ;
211
220
}
212
221
213
222
fn digits_to_string ( str s) -> int {
@@ -430,7 +439,7 @@ fn scan_numeric_escape(&reader rdr, uint n_hex_digits) -> char {
430
439
fn next_token ( & reader rdr) -> token:: token {
431
440
auto accum_str = "" ;
432
441
433
- consume_any_whitespace ( rdr) ;
442
+ consume_whitespace_and_comments ( rdr) ;
434
443
435
444
if ( rdr. is_eof ( ) ) { ret token:: EOF ; }
436
445
@@ -720,70 +729,161 @@ fn next_token(&reader rdr) -> token::token {
720
729
fail;
721
730
}
722
731
723
- tag cmnt_ {
724
- cmnt_line( str) ;
725
- cmnt_block ( vec[ str] ) ;
732
+
733
+ tag cmnt_style {
734
+ isolated; // No code on either side of each line of the comment
735
+ trailing; // Code exists to the left of the comment
736
+ mixed; // Code before /* foo */ and after the comment
726
737
}
727
738
728
- type cmnt = rec ( cmnt_ val , uint pos , bool space_after ) ;
739
+ type cmnt = rec ( cmnt_style style , vec [ str ] lines , uint pos ) ;
729
740
730
- fn consume_whitespace ( & reader rdr) -> uint {
731
- auto lines = 0 u;
732
- while ( is_whitespace ( rdr. curr ( ) ) ) {
733
- if ( rdr. curr ( ) == '\n' ) { lines += 1 u; }
741
+ fn read_to_eol ( & reader rdr) -> str {
742
+ auto val = "" ;
743
+ while ( rdr. curr ( ) != '\n' && !rdr. is_eof ( ) ) {
744
+ str:: push_char ( val, rdr. curr ( ) ) ;
745
+ rdr. bump ( ) ;
746
+ }
747
+ if ( rdr. curr ( ) == '\n' ) {
734
748
rdr. bump ( ) ;
749
+ } else {
750
+ assert rdr. is_eof ( ) ;
735
751
}
736
- ret lines ;
752
+ ret val ;
737
753
}
738
754
739
- fn read_line_comment ( & reader rdr) -> cmnt {
740
- auto p = rdr. get_chpos ( ) ;
741
- rdr. bump ( ) ; rdr. bump ( ) ;
742
- while ( rdr. curr ( ) == ' ' ) { rdr. bump ( ) ; }
743
- auto val = "" ;
744
- while ( rdr. curr ( ) != '\n' && !rdr. is_eof ( ) ) {
745
- str:: push_char ( val, rdr. curr ( ) ) ;
755
+ fn read_one_line_comment ( & reader rdr) -> str {
756
+ auto val = read_to_eol ( rdr) ;
757
+ assert val. ( 0 ) == ( '/' as u8 ) && val. ( 1 ) == ( '/' as u8 ) ;
758
+ ret val;
759
+ }
760
+
761
+ fn consume_whitespace ( & reader rdr) {
762
+ while ( is_whitespace ( rdr. curr ( ) ) && !rdr. is_eof ( ) ) {
746
763
rdr. bump ( ) ;
747
764
}
748
- ret rec( val=cmnt_line ( val) ,
749
- pos=p,
750
- space_after=consume_whitespace ( rdr) > 1 u) ;
751
765
}
752
766
753
- fn read_block_comment ( & reader rdr) -> cmnt {
767
+
768
+ fn consume_non_eol_whitespace ( & reader rdr) {
769
+ while ( is_whitespace ( rdr. curr ( ) ) &&
770
+ rdr. curr ( ) != '\n' && !rdr. is_eof ( ) ) {
771
+ rdr. bump ( ) ;
772
+ }
773
+ }
774
+
775
+
776
+ fn read_line_comments ( & reader rdr, bool code_to_the_left ) -> cmnt {
777
+ log ">>> line comments" ;
754
778
auto p = rdr. get_chpos ( ) ;
755
- rdr. bump ( ) ; rdr. bump ( ) ;
756
- while ( rdr. curr ( ) == ' ' ) { rdr. bump ( ) ; }
757
779
let vec[ str] lines = [ ] ;
758
- auto val = "" ;
759
- auto level = 1 ;
760
- while ( true ) {
761
- if ( rdr. curr ( ) == '\n' ) {
762
- vec:: push[ str] ( lines, val) ;
763
- val = "" ;
764
- consume_whitespace ( rdr) ;
780
+ while ( rdr. curr ( ) == '/' && rdr. next ( ) == '/' ) {
781
+ lines += [ read_one_line_comment ( rdr) ] ;
782
+ consume_non_eol_whitespace ( rdr) ;
783
+ }
784
+ log "<<< line comments" ;
785
+ ret rec( style = if ( code_to_the_left) { trailing } else { isolated } ,
786
+ lines = lines,
787
+ pos=p) ;
788
+ }
789
+
790
+ fn all_whitespace ( & str s, uint begin , uint end) -> bool {
791
+ let uint i = begin;
792
+ while ( i != end) {
793
+ if ( !is_whitespace ( s. ( i) as char ) ) {
794
+ ret false ;
795
+ }
796
+ i += 1 u;
797
+ }
798
+ ret true;
799
+ }
800
+
801
+ fn trim_whitespace_prefix_and_push_line ( & mutable vec[ str] lines ,
802
+ & str s, uint col) {
803
+ auto s1;
804
+ if ( all_whitespace ( s, 0 u, col) ) {
805
+ if ( col < str:: byte_len ( s) ) {
806
+ s1 = str:: slice ( s, col, str:: byte_len ( s) ) ;
765
807
} else {
766
- if ( rdr. curr ( ) == '*' && rdr. next ( ) == '/' ) {
767
- level -= 1 ;
768
- if ( level == 0 ) {
769
- rdr. bump ( ) ; rdr. bump ( ) ;
770
- vec:: push[ str] ( lines, val) ;
771
- break ;
772
- }
773
- } else if ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) {
774
- level += 1 ;
775
- }
776
- str:: push_char ( val, rdr. curr ( ) ) ;
777
- rdr. bump ( ) ;
808
+ s1 = "" ;
778
809
}
810
+ } else {
811
+ s1 = s;
812
+ }
813
+ log "pushing line: " + s1;
814
+ lines += [ s1] ;
815
+ }
816
+
817
+ fn read_block_comment ( & reader rdr,
818
+ bool code_to_the_left ) -> cmnt {
819
+ log ">>> block comment" ;
820
+ auto p = rdr. get_chpos ( ) ;
821
+ let vec[ str] lines = [ ] ;
822
+ let uint col = rdr. get_col ( ) ;
823
+ rdr. bump ( ) ;
824
+ rdr. bump ( ) ;
825
+ auto curr_line = "/*" ;
826
+ let int level = 1 ;
827
+ while ( level > 0 ) {
828
+ log #fmt( "=== block comment level %d" , level) ;
779
829
if ( rdr. is_eof ( ) ) {
780
- rdr. err ( "Unexpected end of file in block comment" ) ;
830
+ rdr. err ( "unterminated block comment" ) ;
781
831
fail;
782
832
}
833
+ if ( rdr. curr ( ) == '\n' ) {
834
+ trim_whitespace_prefix_and_push_line ( lines, curr_line, col) ;
835
+ curr_line = "" ;
836
+ rdr. bump ( ) ;
837
+ } else {
838
+ str:: push_char ( curr_line, rdr. curr ( ) ) ;
839
+ if ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) {
840
+ rdr. bump ( ) ;
841
+ rdr. bump ( ) ;
842
+ curr_line += "*" ;
843
+ level += 1 ;
844
+ } else {
845
+ if ( rdr. curr ( ) == '*' && rdr. next ( ) == '/' ) {
846
+ rdr. bump ( ) ;
847
+ rdr. bump ( ) ;
848
+ curr_line += "/" ;
849
+ level -= 1 ;
850
+ } else {
851
+ rdr. bump ( ) ;
852
+ }
853
+ }
854
+ }
783
855
}
784
- ret rec( val=cmnt_block ( lines) ,
785
- pos=p,
786
- space_after=consume_whitespace ( rdr) > 1 u) ;
856
+ if ( str:: byte_len ( curr_line) != 0 u) {
857
+ trim_whitespace_prefix_and_push_line ( lines, curr_line, col) ;
858
+ }
859
+
860
+ auto style = if ( code_to_the_left) { trailing } else { isolated } ;
861
+ consume_non_eol_whitespace ( rdr) ;
862
+ if ( !rdr. is_eof ( ) &&
863
+ rdr. curr ( ) != '\n' &&
864
+ vec:: len ( lines) == 1 u) {
865
+ style = mixed;
866
+ }
867
+ log "<<< block comment" ;
868
+ ret rec( style = style, lines = lines, pos=p) ;
869
+ }
870
+
871
+ fn peeking_at_comment ( & reader rdr) -> bool {
872
+ ret ( rdr. curr ( ) == '/' && rdr. next ( ) == '/' ) ||
873
+ ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) ;
874
+ }
875
+
876
+ fn consume_comment ( & reader rdr, bool code_to_the_left ,
877
+ & mutable vec[ cmnt] comments ) {
878
+ log ">>> consume comment" ;
879
+ if ( rdr. curr ( ) == '/' && rdr. next ( ) == '/' ) {
880
+ vec:: push[ cmnt] ( comments,
881
+ read_line_comments ( rdr, code_to_the_left) ) ;
882
+ } else if ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) {
883
+ vec:: push[ cmnt] ( comments,
884
+ read_block_comment ( rdr, code_to_the_left) ) ;
885
+ } else { fail; }
886
+ log "<<< consume comment" ;
787
887
}
788
888
789
889
fn gather_comments ( session sess, str path ) -> vec[ cmnt ] {
@@ -793,17 +893,22 @@ fn gather_comments(session sess, str path) -> vec[cmnt] {
793
893
let vec[ cmnt] comments = [ ] ;
794
894
while ( !rdr. is_eof ( ) ) {
795
895
while ( true ) {
796
- consume_whitespace ( rdr) ;
797
- if ( rdr. curr ( ) == '/' && rdr. next ( ) == '/' ) {
798
- vec:: push[ cmnt] ( comments, read_line_comment ( rdr) ) ;
799
- } else if ( rdr. curr ( ) == '/' && rdr. next ( ) == '*' ) {
800
- vec:: push[ cmnt] ( comments, read_block_comment ( rdr) ) ;
801
- } else { break ; }
896
+ auto code_to_the_left = true ;
897
+ consume_non_eol_whitespace ( rdr) ;
898
+ if ( rdr. next ( ) == '\n' ) {
899
+ code_to_the_left = false ;
900
+ consume_whitespace ( rdr) ;
901
+ }
902
+ while ( peeking_at_comment ( rdr) ) {
903
+ consume_comment ( rdr, code_to_the_left, comments) ;
904
+ consume_whitespace ( rdr) ;
905
+ }
906
+ break ;
802
907
}
803
908
next_token ( rdr) ;
804
909
}
805
910
ret comments;
806
- }
911
+ }
807
912
808
913
809
914
//
0 commit comments