11
11
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12
12
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13
13
//
14
+ // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15
+ // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16
+ //
17
+ // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18
+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19
+ //
14
20
// The mov pseudo instruction could be expanded to multiple mov instructions
15
21
// later. In this case, we could try to split the constant operand of mov
16
- // instruction into two bitmask immediates. It makes two AND instructions
17
- // intead of multiple `mov` + `and` instructions.
22
+ // instruction into two immediates which can be directly encoded into
23
+ // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24
+ // multiple `mov` + `and/add/sub` instructions.
18
25
//
19
- // 2 . Remove redundant ORRWrs which is generated by zero-extend.
26
+ // 4 . Remove redundant ORRWrs which is generated by zero-extend.
20
27
//
21
28
// %3:gpr32 = ORRWrs $wzr, %2, 0
22
29
// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
@@ -51,6 +58,12 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
51
58
MachineLoopInfo *MLI;
52
59
MachineRegisterInfo *MRI;
53
60
61
+ bool checkMovImmInstr (MachineInstr &MI, MachineInstr *&MovMI,
62
+ MachineInstr *&SubregToRegMI);
63
+
64
+ template <typename T>
65
+ bool visitADDSUB (MachineInstr &MI,
66
+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved, bool IsAdd);
54
67
template <typename T>
55
68
bool visitAND (MachineInstr &MI,
56
69
SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
@@ -131,36 +144,9 @@ bool AArch64MIPeepholeOpt::visitAND(
131
144
assert ((RegSize == 32 || RegSize == 64 ) &&
132
145
" Invalid RegSize for AND bitmask peephole optimization" );
133
146
134
- // Check whether AND's MBB is in loop and the AND is loop invariant.
135
- MachineBasicBlock *MBB = MI.getParent ();
136
- MachineLoop *L = MLI->getLoopFor (MBB);
137
- if (L && !L->isLoopInvariant (MI))
138
- return false ;
139
-
140
- // Check whether AND's operand is MOV with immediate.
141
- MachineInstr *MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
142
- if (!MovMI)
143
- return false ;
144
-
145
- MachineInstr *SubregToRegMI = nullptr ;
146
- // If it is SUBREG_TO_REG, check its operand.
147
- if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
148
- SubregToRegMI = MovMI;
149
- MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
150
- if (!MovMI)
151
- return false ;
152
- }
153
-
154
- if (MovMI->getOpcode () != AArch64::MOVi32imm &&
155
- MovMI->getOpcode () != AArch64::MOVi64imm)
156
- return false ;
157
-
158
- // If the MOV has multiple uses, do not split the immediate because it causes
159
- // more instructions.
160
- if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
161
- return false ;
162
-
163
- if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
147
+ // Perform several essential checks against current MI.
148
+ MachineInstr *MovMI = nullptr , *SubregToRegMI = nullptr ;
149
+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
164
150
return false ;
165
151
166
152
// Split the bitmask immediate into two.
@@ -177,6 +163,7 @@ bool AArch64MIPeepholeOpt::visitAND(
177
163
178
164
// Create new AND MIs.
179
165
DebugLoc DL = MI.getDebugLoc ();
166
+ MachineBasicBlock *MBB = MI.getParent ();
180
167
const TargetRegisterClass *ANDImmRC =
181
168
(RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
182
169
Register DstReg = MI.getOperand (0 ).getReg ();
@@ -251,6 +238,139 @@ bool AArch64MIPeepholeOpt::visitORR(
251
238
return true ;
252
239
}
253
240
241
+ template <typename T>
242
+ static bool splitAddSubImm (T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
243
+ // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
244
+ // imm0 and imm1 are non-zero 12-bit unsigned int.
245
+ if ((Imm & 0xfff000 ) == 0 || (Imm & 0xfff ) == 0 ||
246
+ (Imm & ~static_cast <T>(0xffffff )) != 0 )
247
+ return false ;
248
+
249
+ // The immediate can not be composed via a single instruction.
250
+ SmallVector<AArch64_IMM::ImmInsnModel, 4 > Insn;
251
+ AArch64_IMM::expandMOVImm (Imm, RegSize, Insn);
252
+ if (Insn.size () == 1 )
253
+ return false ;
254
+
255
+ // Split Imm into (Imm0 << 12) + Imm1;
256
+ Imm0 = (Imm >> 12 ) & 0xfff ;
257
+ Imm1 = Imm & 0xfff ;
258
+ return true ;
259
+ }
260
+
261
+ template <typename T>
262
+ bool AArch64MIPeepholeOpt::visitADDSUB (
263
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8 > &ToBeRemoved,
264
+ bool IsAdd) {
265
+ // Try below transformation.
266
+ //
267
+ // MOVi32imm + ADDWrr ==> ANDWri + ANDWri
268
+ // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
269
+ //
270
+ // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
271
+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
272
+ //
273
+ // The mov pseudo instruction could be expanded to multiple mov instructions
274
+ // later. Let's try to split the constant operand of mov instruction into two
275
+ // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
276
+ // multiple `mov` + `and/sub` instructions.
277
+
278
+ unsigned RegSize = sizeof (T) * 8 ;
279
+ assert ((RegSize == 32 || RegSize == 64 ) &&
280
+ " Invalid RegSize for legal add/sub immediate peephole optimization" );
281
+
282
+ // Perform several essential checks against current MI.
283
+ MachineInstr *MovMI, *SubregToRegMI;
284
+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
285
+ return false ;
286
+
287
+ // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
288
+ T Imm = static_cast <T>(MovMI->getOperand (1 ).getImm ()), Imm0, Imm1;
289
+ unsigned Opcode;
290
+ if (splitAddSubImm (Imm, RegSize, Imm0, Imm1)) {
291
+ if (IsAdd)
292
+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
293
+ else
294
+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
295
+ } else if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1)) {
296
+ if (IsAdd)
297
+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
298
+ else
299
+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
300
+ } else {
301
+ return false ;
302
+ }
303
+
304
+ // Create new ADD/SUB MIs.
305
+ DebugLoc DL = MI.getDebugLoc ();
306
+ MachineBasicBlock *MBB = MI.getParent ();
307
+ const TargetRegisterClass *RC =
308
+ (RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
309
+ Register DstReg = MI.getOperand (0 ).getReg ();
310
+ Register SrcReg = MI.getOperand (1 ).getReg ();
311
+ Register TmpReg = MRI->createVirtualRegister (RC);
312
+
313
+ MRI->constrainRegClass (SrcReg, RC);
314
+ BuildMI (*MBB, MI, DL, TII->get (Opcode), TmpReg)
315
+ .addReg (SrcReg)
316
+ .addImm (Imm0)
317
+ .addImm (12 );
318
+
319
+ MRI->constrainRegClass (DstReg, RC);
320
+ BuildMI (*MBB, MI, DL, TII->get (Opcode), DstReg)
321
+ .addReg (TmpReg)
322
+ .addImm (Imm1)
323
+ .addImm (0 );
324
+
325
+ // Record the MIs need to be removed.
326
+ ToBeRemoved.insert (&MI);
327
+ if (SubregToRegMI)
328
+ ToBeRemoved.insert (SubregToRegMI);
329
+ ToBeRemoved.insert (MovMI);
330
+
331
+ return true ;
332
+ }
333
+
334
+ // Checks if the corresponding MOV immediate instruction is applicable for
335
+ // this peephole optimization.
336
+ bool AArch64MIPeepholeOpt::checkMovImmInstr (MachineInstr &MI,
337
+ MachineInstr *&MovMI,
338
+ MachineInstr *&SubregToRegMI) {
339
+ // Check whether current MBB is in loop and the AND is loop invariant.
340
+ MachineBasicBlock *MBB = MI.getParent ();
341
+ MachineLoop *L = MLI->getLoopFor (MBB);
342
+ if (L && !L->isLoopInvariant (MI))
343
+ return false ;
344
+
345
+ // Check whether current MI's operand is MOV with immediate.
346
+ MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
347
+ if (!MovMI)
348
+ return false ;
349
+
350
+ // If it is SUBREG_TO_REG, check its operand.
351
+ SubregToRegMI = nullptr ;
352
+ if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
353
+ SubregToRegMI = MovMI;
354
+ MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
355
+ if (!MovMI)
356
+ return false ;
357
+ }
358
+
359
+ if (MovMI->getOpcode () != AArch64::MOVi32imm &&
360
+ MovMI->getOpcode () != AArch64::MOVi64imm)
361
+ return false ;
362
+
363
+ // If the MOV has multiple uses, do not split the immediate because it causes
364
+ // more instructions.
365
+ if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
366
+ return false ;
367
+ if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
368
+ return false ;
369
+
370
+ // It is OK to perform this peephole optimization.
371
+ return true ;
372
+ }
373
+
254
374
bool AArch64MIPeepholeOpt::runOnMachineFunction (MachineFunction &MF) {
255
375
if (skipFunction (MF.getFunction ()))
256
376
return false ;
@@ -278,6 +398,19 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
278
398
break ;
279
399
case AArch64::ORRWrs:
280
400
Changed = visitORR (MI, ToBeRemoved);
401
+ break ;
402
+ case AArch64::ADDWrr:
403
+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, true );
404
+ break ;
405
+ case AArch64::SUBWrr:
406
+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, false );
407
+ break ;
408
+ case AArch64::ADDXrr:
409
+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, true );
410
+ break ;
411
+ case AArch64::SUBXrr:
412
+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, false );
413
+ break ;
281
414
}
282
415
}
283
416
}
0 commit comments