-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathtsl_parser.y
295 lines (252 loc) · 9.71 KB
/
tsl_parser.y
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "tsl.h"
#include "tsl_ast.h"
#include "tsl_lexer.h"
/* Forward declarations */
int yylex(void);
void yyerror(const char *s);
extern int yylineno;
extern int yycolumn;
extern void set_error_position(int pos);
/* Global variable to hold the AST root after parsing */
ast_node *ast_root = NULL;
/* Declarations for Flex helper functions */
extern void *yy_scan_string(const char *);
extern void yy_delete_buffer(void *);
/* API function to parse an input string and return the AST */
ast_node *parse_input_string(const char *input);
/* Global variables for error reporting */
int yycolumn = 1;
int error_pos = 0;
char *input_string = NULL;
char *error_string = NULL;
%}
/* Bison semantic value type */
%union {
ast_node *node;
double num;
char *str;
}
/* Token definitions */
%token K_LIKE K_ILIKE K_AND K_OR K_BETWEEN K_IN K_IS K_NULL
%token K_NOT K_TRUE K_FALSE K_LEN K_ANY K_ALL
%token K_SUM /* new sum token */
%token <str> RFC3339 DATE
%token LPAREN RPAREN COMMA
%token PLUS MINUS STAR SLASH PERCENT
%token LBRACKET RBRACKET
%token <str> NUMERIC_LITERAL STRING_LITERAL IDENTIFIER
/* Operator precedence and associativity */
%left K_OR /* lowest precedence */
%left K_AND
%left EQ NE LT LE GT GE REQ RNE
%left K_LIKE K_ILIKE K_IS K_BETWEEN K_IN
%left PLUS MINUS /* + - */
%left STAR SLASH PERCENT /* * / % */
%right K_NOT K_LEN K_ANY K_ALL K_SUM /* give sum same unary precedence */
%right UMINUS /* unary minus */
/* Nonterminal types */
%type <node> input expr or_expr and_expr comparison_expr
%type <node> additive_expr multiplicative_expr not_expr unary_expr
%type <node> primary array_elements array opt_array_elements
%start input
%%
input:
expr { $$ = $1; ast_root = $1; }
;
expr:
or_expr
;
or_expr:
and_expr
| or_expr K_OR and_expr { $$ = ast_create_binary(K_OR, $1, $3); }
;
and_expr:
comparison_expr
| and_expr K_AND comparison_expr { $$ = ast_create_binary(K_AND, $1, $3); }
;
comparison_expr:
additive_expr
| comparison_expr EQ additive_expr { $$ = ast_create_binary(EQ, $1, $3); }
| comparison_expr NE additive_expr { $$ = ast_create_binary(NE, $1, $3); }
| comparison_expr LT additive_expr { $$ = ast_create_binary(LT, $1, $3); }
| comparison_expr LE additive_expr { $$ = ast_create_binary(LE, $1, $3); }
| comparison_expr GT additive_expr { $$ = ast_create_binary(GT, $1, $3); }
| comparison_expr GE additive_expr { $$ = ast_create_binary(GE, $1, $3); }
| comparison_expr REQ additive_expr { $$ = ast_create_binary(REQ, $1, $3); }
| comparison_expr RNE additive_expr { $$ = ast_create_binary(RNE, $1, $3); }
| comparison_expr K_LIKE additive_expr { $$ = ast_create_binary(K_LIKE, $1, $3); }
| comparison_expr K_ILIKE additive_expr { $$ = ast_create_binary(K_ILIKE, $1, $3); }
| comparison_expr K_NOT K_LIKE additive_expr {
ast_node *like_expr = ast_create_binary(K_LIKE, $1, $4);
$$ = ast_create_unary(K_NOT, like_expr);
}
| comparison_expr K_NOT K_ILIKE additive_expr {
ast_node *ilike_expr = ast_create_binary(K_ILIKE, $1, $4);
$$ = ast_create_unary(K_NOT, ilike_expr);
}
| comparison_expr K_IS K_NULL { $$ = ast_create_binary(K_IS, $1, ast_create_null()); }
| comparison_expr K_IS K_NOT K_NULL {
ast_node *is_null = ast_create_binary(K_IS, $1, ast_create_null());
$$ = ast_create_unary(K_NOT, is_null);
}
| comparison_expr K_BETWEEN additive_expr K_AND additive_expr {
ast_node **elements = malloc(2 * sizeof(ast_node*));
elements[0] = $3;
elements[1] = $5;
ast_node *range = ast_create_array(2, elements);
$$ = ast_create_binary(K_BETWEEN, $1, range);
free(elements);
}
| comparison_expr K_NOT K_BETWEEN additive_expr K_AND additive_expr {
ast_node **elements = malloc(2 * sizeof(ast_node*));
elements[0] = $4;
elements[1] = $6;
ast_node *range = ast_create_array(2, elements);
ast_node *between = ast_create_binary(K_BETWEEN, $1, range);
$$ = ast_create_unary(K_NOT, between);
free(elements);
}
| comparison_expr K_IN additive_expr { $$ = ast_create_binary(K_IN, $1, $3); }
| comparison_expr K_NOT K_IN additive_expr {
ast_node *in_expr = ast_create_binary(K_IN, $1, $4);
$$ = ast_create_unary(K_NOT, in_expr);
}
;
additive_expr:
multiplicative_expr
| additive_expr PLUS multiplicative_expr { $$ = ast_create_binary(PLUS, $1, $3); }
| additive_expr MINUS multiplicative_expr { $$ = ast_create_binary(MINUS, $1, $3); }
;
multiplicative_expr:
not_expr
| multiplicative_expr STAR not_expr { $$ = ast_create_binary(STAR, $1, $3); }
| multiplicative_expr SLASH not_expr { $$ = ast_create_binary(SLASH, $1, $3); }
| multiplicative_expr PERCENT not_expr { $$ = ast_create_binary(PERCENT, $1, $3); }
;
not_expr:
unary_expr
| K_NOT not_expr { $$ = ast_create_unary(K_NOT, $2); }
| K_LEN not_expr { $$ = ast_create_unary(K_LEN, $2); }
| K_ANY not_expr { $$ = ast_create_unary(K_ANY, $2); }
| K_ALL not_expr { $$ = ast_create_unary(K_ALL, $2); }
| K_SUM not_expr { $$ = ast_create_unary(K_SUM, $2); } /* handle sum */
;
unary_expr:
primary
| MINUS unary_expr { $$ = ast_create_unary(UMINUS, $2); }
| PLUS unary_expr { $$ = $2; } /* unary plus is a no-op */
| LPAREN expr RPAREN { $$ = $2; }
| array { $$ = $1; }
;
array:
LBRACKET opt_array_elements RBRACKET { $$ = $2; } /* only square brackets for arrays */
;
opt_array_elements:
/* empty */ { $$ = ast_create_array(0, NULL); } /* Empty array */
| array_elements { $$ = $1; }
| array_elements COMMA { $$ = $1; } /* Trailing comma */
;
array_elements:
expr {
ast_node **elements = malloc(sizeof(ast_node*));
elements[0] = $1;
$$ = ast_create_array(1, elements);
free(elements);
}
| array_elements COMMA expr {
ast_node **elements = malloc(($1->data.array.size + 1) * sizeof(ast_node*));
// Copy existing elements
for(int i = 0; i < $1->data.array.size; i++) {
elements[i] = ast_clone($1->data.array.elements[i]);
}
elements[$1->data.array.size] = ast_clone($3);
$$ = ast_create_array($1->data.array.size + 1, elements);
free(elements);
ast_free($1);
}
;
primary:
NUMERIC_LITERAL { $$ = ast_create_number($1); free($1); }
| STRING_LITERAL { $$ = ast_create_string($1); free($1); }
| IDENTIFIER { $$ = ast_create_identifier($1); free($1); }
| RFC3339 { $$ = ast_create_rfc3339($1); free($1); }
| DATE { $$ = ast_create_date($1); free($1); }
| K_TRUE { $$ = ast_create_boolean(1); }
| K_FALSE { $$ = ast_create_boolean(0); }
;
%%
/* API function implementation */
ast_node *parse_input_string(const char *input) {
void *buffer = yy_scan_string(input);
ast_root = NULL;
// Store input for error reporting
if (input_string) {
free(input_string);
}
input_string = strdup(input);
if (error_string) {
free(error_string);
error_string = NULL;
}
yycolumn = 1; // Reset column counter
error_pos = 0;
int result = yyparse();
yy_delete_buffer(buffer);
ast_node *return_node = NULL;
if (result == 0) {
return_node = ast_root;
} else if (ast_root) {
ast_free(ast_root);
}
// Don't free input_string here - it might still be needed for error reporting
return return_node;
}
// Add cleanup function
void cleanup_parser_memory(void) {
if (input_string) {
free(input_string);
input_string = NULL;
}
if (error_string) {
free(error_string);
error_string = NULL;
}
}
/* Enhanced error handling function */
void yyerror(const char *s) {
set_error_position(yycolumn - 2); // Subtract 2 to account for token start
if (error_string) {
free(error_string);
}
error_string = strdup(s);
}
/* Error string getter */
const char* get_error_string(void) {
return error_string ? error_string : "Unknown error";
}
/* Input string getter */
const char* get_input_string_at_error(void) {
if (!input_string || error_pos <= 0) {
return "";
}
static char context[512];
int len = strlen(input_string);
int i = error_pos - 1;
// Skip whitespace backwards to find token start
while (i > 0 && isspace(input_string[i])) i--;
// Find token start
int start = i;
while (start > 0 && !isspace(input_string[start-1])) start--;
// Find token end
int end = error_pos;
while (end < len && !isspace(input_string[end])) end++;
// Extract the token
snprintf(context, sizeof(context), "%.*s",
end - start, input_string + start);
return context;
}