1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1985-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gloop.d, backend/gloop.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/gloop.d 11 */ 12 13 14 module dmd.backend.gloop; 15 16 version (SCPP) 17 version = COMPILE; 18 version (MARS) 19 version = COMPILE; 20 21 version (COMPILE) 22 { 23 24 import core.stdc.stdio; 25 import core.stdc.stdlib; 26 import core.stdc..string; 27 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code_x86; 31 import dmd.backend.evalu8 : el_toldoubled; 32 import dmd.backend.oper; 33 import dmd.backend.global; 34 import dmd.backend.goh; 35 import dmd.backend.el; 36 import dmd.backend.outbuf; 37 import dmd.backend.ty; 38 import dmd.backend.type; 39 40 import dmd.backend.dlist; 41 import dmd.backend.dvec; 42 import dmd.backend.mem; 43 44 nothrow: 45 46 char symbol_isintab(Symbol *s) { return sytab[s.Sclass] & SCSS; } 47 48 extern (C++): 49 50 bool findloopparameters(elem* erel, ref elem* rdeq, ref elem* rdinc); 51 52 /********************************* 53 * Loop data structure. 54 */ 55 56 struct loop 57 { 58 nothrow: 59 loop *Lnext; // Next loop in list (startloop -> start of list) 60 vec_t Lloop; // Vector of blocks in this loop 61 vec_t Lexit; // Vector of exit blocks of loop 62 block *Lhead; // Pointer to header of loop 63 block *Ltail; // Pointer to tail 64 block *Lpreheader; // Pointer to preheader (if any) 65 list_t Llis; // loop invariant elems moved to Lpreheader, so 66 // redundant temporaries aren't created 67 Iv *Livlist; // basic induction variables 68 Iv *Lopeqlist; // list of other op= variables 69 70 /*********************** 71 * Write loop. 72 */ 73 74 void print() 75 { 76 debug 77 { 78 loop *l = &this; 79 printf("loop %p, next = %p\n",l,(l) ? l.Lnext : cast(loop *) null); 80 if (!l) 81 return; 82 printf("\thead: B%d, tail: B%d, prehead: B%d\n",l.Lhead.Bdfoidx, 83 l.Ltail.Bdfoidx,(l.Lpreheader ) ? l.Lpreheader.Bdfoidx 84 : cast(uint)-1); 85 printf("\tLloop "); vec_println(l.Lloop); 86 printf("\tLexit "); vec_println(l.Lexit); 87 } 88 } 89 90 /*************************** 91 * Allocate loop. 92 */ 93 94 static loop *mycalloc() 95 { loop *l; 96 97 if (freelist) 98 { 99 l = freelist; 100 freelist = l.Lnext; 101 memset(l,0,loop.sizeof); 102 } 103 else 104 l = cast(loop *) mem_calloc(loop.sizeof); 105 return l; 106 } 107 108 __gshared loop *freelist; 109 } 110 111 struct famlist 112 { 113 nothrow: 114 elem **FLpelem; /* parent of elem in the family */ 115 elem *c1; 116 elem *c2; // c1*(basic IV) + c2 117 Symbol *FLtemp; // symbol index of temporary (FLELIM if */ 118 /* this entry has no temporary) */ 119 tym_t FLty; /* type of this induction variable */ 120 tym_t FLivty; /* type of the basic IV elem (which is */ 121 /* not necessarilly the type of the IV */ 122 /* elem!) */ 123 famlist *FLnext; // next in list 124 125 void print() 126 { 127 debug 128 { 129 printf("famlist:\n"); 130 printf("*FLpelem:\n"); 131 elem_print(*FLpelem); 132 printf("c1:"); 133 elem_print(c1); 134 printf("c2:"); 135 elem_print(c2); 136 printf("FLty = "); WRTYxx(FLty); 137 printf("\nFLivty = "); WRTYxx(FLivty); 138 printf("\n"); 139 } 140 } 141 142 /*************************** 143 * Allocate famlist. 144 */ 145 146 static famlist *mycalloc() 147 { famlist *fl; 148 149 if (freelist) 150 { 151 fl = freelist; 152 freelist = fl.FLnext; 153 memset(fl,0,famlist.sizeof); 154 } 155 else 156 fl = cast(famlist *) mem_calloc(famlist.sizeof); 157 return fl; 158 } 159 160 __gshared famlist *freelist; 161 } 162 163 enum FLELIM = cast(Symbol *)-1; 164 165 struct Iv 166 { 167 nothrow: 168 Symbol *IVbasic; // symbol of basic IV 169 elem **IVincr; // pointer to parent of IV increment elem 170 famlist *IVfamily; // variables in this family 171 Iv *IVnext; // next iv in list 172 173 void print() 174 { 175 debug 176 { 177 printf("IV: '%s'\n",IVbasic.Sident.ptr); 178 printf("*IVincr:\n"); 179 elem_print(*IVincr); 180 } 181 } 182 183 /*************************** 184 * Allocate Iv. 185 */ 186 187 static Iv *mycalloc() 188 { Iv *iv; 189 190 if (freelist) 191 { 192 iv = freelist; 193 freelist = iv.IVnext; 194 memset(iv,0,Iv.sizeof); 195 } 196 else 197 iv = cast(Iv *) mem_calloc(Iv.sizeof); 198 return iv; 199 } 200 201 __gshared Iv *freelist; 202 } 203 204 205 private __gshared bool addblk; /* if TRUE, then we added a block */ 206 207 /* is elem loop invariant? */ 208 int isLI(elem *n) { return n.Nflags & NFLli; } 209 210 /* make elem loop invariant */ 211 void makeLI(elem *n) { n.Nflags |= NFLli; } 212 213 /****************************** 214 * Only variables that could only be unambiguously defined 215 * are candidates for loop invariant removal and induction 216 * variables. 217 * This means only variables that have the SFLunambig flag 218 * set for them. 219 * Doing this will still cover 90% (I hope) of the cases, and 220 * is a lot faster to compute. 221 */ 222 223 /************* 224 * Free loops. 225 */ 226 227 private void freeloop(loop **pl) 228 { 229 loop *ln; 230 231 for (loop *l = *pl; l; l = ln) 232 { 233 ln = l.Lnext; 234 vec_free(l.Lloop); 235 vec_free(l.Lexit); 236 list_free(&l.Llis); 237 l.Lnext = loop.freelist; 238 loop.freelist = l; 239 } 240 *pl = null; 241 } 242 243 244 /********************************** 245 * Initialize block information. 246 * Returns: 247 * !=0 contains BCasm block 248 */ 249 250 int blockinit() 251 { 252 bool hasasm = false; 253 254 assert(dfo); 255 uint i = 0; 256 foreach (b; BlockRange(startblock)) 257 { 258 debug /* check integrity of Bpred and Bsucc */ 259 L1: 260 foreach (blp; ListRange(b.Bpred)) 261 { 262 foreach (bls; ListRange(list_block(blp).Bsucc)) 263 if (list_block(bls) == b) 264 continue L1; 265 assert(0); 266 } 267 268 ++i; 269 if (b.BC == BCasm) 270 hasasm = true; 271 /* compute number of blocks */ 272 } 273 assert(numblks == i && maxblks); 274 assert(i <= maxblks); 275 foreach (j, b; dfo[]) 276 { 277 assert(b.Bdfoidx == j); 278 b.Bdom = vec_realloc(b.Bdom, maxblks); /* alloc Bdom vectors */ 279 vec_clear(b.Bdom); 280 } 281 return hasasm; 282 } 283 284 /**************************************** 285 * Compute dominators (Bdom) for each block. 286 * See Aho & Ullman Fig. 13.5. 287 * Note that flow graph is reducible if there is only one 288 * pass through the loop. 289 * Input: 290 * dfo[] 291 * Output: 292 * fills in the Bdom vector for each block 293 */ 294 295 void compdom() 296 { 297 compdom(dfo[]); 298 } 299 300 private extern (D) void compdom(block*[] dfo) 301 { 302 assert(dfo.length); 303 block* sb = dfo[0]; // starting block 304 305 vec_clear(sb.Bdom); 306 vec_setbit(0,sb.Bdom); // starting block only doms itself 307 foreach (b; dfo) // for all except startblock 308 { 309 if (b != sb) 310 vec_set(b.Bdom); // dominate all blocks 311 } 312 313 vec_t t1 = vec_calloc(vec_numbits(sb.Bdom)); // allocate a temporary 314 uint cntr = 0; // # of times thru loop 315 bool chgs; 316 do 317 { 318 chgs = false; 319 foreach (i, b; dfo) // for each block in dfo[] 320 { 321 if (i == 0) 322 continue; // except startblock 323 if (b.Bpred) // if there are predecessors 324 { 325 vec_set(t1); 326 foreach (bl; ListRange(b.Bpred)) 327 { 328 vec_andass(t1,list_block(bl).Bdom); 329 } 330 } 331 else 332 vec_clear(t1); // no predecessors to dominate 333 vec_setbit(i,t1); // each block doms itself 334 if (chgs) 335 vec_copy(b.Bdom,t1); 336 else if (!vec_equal(b.Bdom,t1)) // if any changes 337 { 338 vec_copy(b.Bdom,t1); 339 chgs = true; 340 } 341 } 342 cntr++; 343 assert(cntr < 50); // should have converged by now 344 } while (chgs); 345 vec_free(t1); 346 347 debug if (debugc) 348 { 349 printf("Flow graph is%s reducible\n", cntr <= 2 ? "".ptr : " not".ptr); 350 } 351 } 352 353 /*************************** 354 * Return !=0 if block A dominates block B. 355 */ 356 357 bool dom(block *A,block *B) 358 { 359 assert(A && B && dfo && dfo[A.Bdfoidx] == A); 360 return vec_testbit(A.Bdfoidx,B.Bdom) != 0; 361 } 362 363 /********************** 364 * Find all the loops. 365 */ 366 367 private extern (D) void findloops(block*[] dfo, loop **ploops) 368 { 369 freeloop(ploops); 370 371 //printf("findloops()\n"); 372 foreach (b; dfo) 373 b.Bweight = 1; // reset Bweights 374 foreach_reverse (b; dfo) // for each block (note reverse 375 // dfo order, so most nested 376 // loops are found first) 377 { 378 assert(b); 379 foreach (bl; ListRange(b.Bsucc)) 380 { 381 block *s = list_block(bl); // each successor s to b 382 assert(s); 383 if (dom(s,b)) // if s dominates b 384 buildloop(ploops,s,b); // we found a loop 385 } 386 } 387 388 debug if (debugc) 389 { 390 for (loop *l = *ploops; l; l = l.Lnext) 391 l.print(); 392 } 393 } 394 395 /******************************** 396 */ 397 398 private uint loop_weight(uint weight, int factor) pure 399 { 400 // Be careful not to overflow 401 if (weight < 0x1_0000) 402 weight *= 10 * factor; 403 else if (weight < 0x10_0000) 404 weight *= 2 * factor; 405 else 406 weight += factor; 407 assert(cast(int)weight > 0); 408 return weight; 409 } 410 411 /***************************** 412 * Construct natural loop. 413 * Algorithm 13.1 from Aho & Ullman. 414 * Note that head dom tail. 415 */ 416 417 private void buildloop(loop **ploops,block *head,block *tail) 418 { 419 loop *l; 420 421 //printf("buildloop()\n"); 422 /* See if this is part of an existing loop. If so, merge the two. */ 423 for (l = *ploops; l; l = l.Lnext) 424 if (l.Lhead == head) /* two loops with same header */ 425 { 426 vec_t v; 427 428 // Calculate loop contents separately so we get the Bweights 429 // done accurately. 430 431 v = vec_calloc(maxblks); 432 vec_setbit(head.Bdfoidx,v); 433 head.Bweight = loop_weight(head.Bweight, 1); 434 insert(tail,v); 435 436 vec_orass(l.Lloop,v); // merge into existing loop 437 vec_free(v); 438 439 vec_clear(l.Lexit); // recompute exit blocks 440 goto L1; 441 } 442 443 /* Allocate loop entry */ 444 l = loop.mycalloc(); 445 l.Lnext = *ploops; 446 *ploops = l; // put l at beginning of list 447 448 l.Lloop = vec_calloc(maxblks); /* allocate loop bit vector */ 449 l.Lexit = vec_calloc(maxblks); /* bit vector for exit blocks */ 450 l.Lhead = head; 451 l.Ltail = tail; 452 453 vec_setbit(head.Bdfoidx,l.Lloop); /* add head to the loop */ 454 head.Bweight = loop_weight(head.Bweight, 2); // *20 usage for loop header 455 456 insert(tail,l.Lloop); /* insert tail in loop */ 457 458 L1: 459 /* Find all the exit blocks (those blocks with 460 * successors outside the loop). 461 */ 462 463 uint i; 464 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in this loop 465 { 466 if (dfo[i].BC == BCret || dfo[i].BC == BCretexp || dfo[i].BC == BCexit) 467 vec_setbit(i,l.Lexit); /* ret blocks are exit blocks */ 468 else 469 { 470 foreach (bl; ListRange(dfo[i].Bsucc)) 471 if (!vec_testbit(list_block(bl).Bdfoidx,l.Lloop)) 472 { 473 vec_setbit(i,l.Lexit); 474 break; 475 } 476 } 477 } 478 479 /* Find preheader, if any, to the loop. 480 The preheader is a block that has only the head as a successor. 481 All other predecessors of head must be inside the loop. 482 */ 483 l.Lpreheader = null; 484 foreach (bl; ListRange(head.Bpred)) 485 { 486 block *b = list_block(bl); 487 488 if (!vec_testbit(b.Bdfoidx,l.Lloop)) /* if not in loop */ 489 { 490 if (l.Lpreheader) /* if already one */ 491 { 492 l.Lpreheader = null; /* can only be one */ 493 break; 494 } 495 else 496 { 497 if (list_next(b.Bsucc)) // if more than 1 successor 498 break; // b can't be a preheader 499 l.Lpreheader = b; 500 } 501 } 502 } 503 } 504 505 /******************************** 506 * Support routine for buildloop(). 507 * Add a block b and all its predecessors to loop lv. 508 */ 509 510 private void insert(block *b, vec_t lv) 511 { 512 assert(b && lv); 513 if (!vec_testbit(b.Bdfoidx,lv)) /* if block is not in loop */ 514 { 515 vec_setbit(b.Bdfoidx,lv); /* add block to loop */ 516 b.Bweight = loop_weight(b.Bweight,1); // *10 usage count 517 foreach (bl; ListRange(b.Bpred)) 518 insert(list_block(bl),lv); /* insert all its predecessors */ 519 } 520 } 521 522 /************************************** 523 * Perform loop rotations. 524 * Loop starts as: 525 * 526 * prehead 527 * | 528 * v 529 * +->head----> 530 * | | 531 * | v 532 * | body 533 * | | 534 * | v 535 * +--tail 536 * 537 * Two types are done: 538 * 1) Header is moved to be past the tail. 539 * 540 * prehead 541 * | 542 * +---+ 543 * | 544 * | body<-+ 545 * | | | 546 * | v | 547 * | tail | 548 * | | | 549 * | v | 550 * +->head--+ 551 * | 552 * v 553 * 554 * 2) Header is copied past the tail (done only if MFtime is set). 555 * 556 * prehead 557 * | 558 * v 559 * head1-----+ 560 * | | 561 * v | 562 * body<--+ | 563 * | | | 564 * v | | 565 * tail | | 566 * | | | 567 * v | | 568 * head2--+ | 569 * | | 570 * +--------+ 571 * v 572 * 573 * Input: 574 * Loop information (do not depend on the preheader information) 575 * Output: 576 * Revised list of blocks, a new dfo and new loop information 577 * Returns: 578 * true need to recompute loop data 579 */ 580 581 private int looprotate(loop *l) 582 { 583 block *tail = l.Ltail; 584 block *head = l.Lhead; 585 586 //printf("looprotate(%p)\n",l); 587 588 // Do not rotate loop if: 589 if (head == tail || // loop is only one block big 590 !vec_testbit(head.Bdfoidx,l.Lexit)) // header is not an exit block 591 goto Lret; 592 593 if (//iter != 1 && 594 vec_testbit(tail.Bdfoidx,l.Lexit)) // tail is an exit block 595 goto Lret; 596 597 // Do not rotate if already rotated 598 foreach (b; BlockRange(tail.Bnext)) 599 if (b == head) // if loop already rotated 600 goto Lret; 601 602 if (head.BC == BCtry) 603 goto Lret; 604 if (head.BC == BC_try) 605 goto Lret; 606 607 //if (debugc) { printf("looprotate: "); l.print(); } 608 609 if ((go.mfoptim & MFtime) && head.BC != BCswitch && head.BC != BCasm) 610 { // Duplicate the header past the tail (but doing 611 // switches would be too expensive in terms of code 612 // generated). 613 block *head2; 614 list_t *pbl2; 615 list_t *pbln; 616 617 head2 = block_calloc(); // create new head block 618 numblks++; // number of blocks in existence 619 head2.Btry = head.Btry; 620 head2.Bflags = head.Bflags; 621 head.Bflags = BFLnomerg; // move flags over to head2 622 head2.Bflags |= BFLnomerg; 623 head2.BC = head.BC; 624 assert(head2.BC != BCswitch); 625 if (head.Belem) // copy expression tree 626 head2.Belem = el_copytree(head.Belem); 627 head2.Bnext = tail.Bnext; 628 tail.Bnext = head2; 629 630 // pred(head1) = pred(head) outside loop 631 // pred(head2) = pred(head) inside loop 632 pbl2 = &(head2.Bpred); 633 for (list_t *pbl = &(head.Bpred); *pbl; pbl = pbln) 634 { 635 if (vec_testbit(list_block(*pbl).Bdfoidx, l.Lloop)) 636 { // if this predecessor is inside the loop 637 638 *pbl2 = *pbl; 639 *pbl = list_next(*pbl); 640 pbln = pbl; // don't skip this next one 641 (*pbl2).next = null; 642 auto bsucc = list_block(*pbl2).Bsucc; 643 pbl2 = &((*pbl2).next); 644 foreach (bl; ListRange(bsucc)) 645 if (list_block(bl) == head) 646 { 647 bl.ptr = cast(void *)head2; 648 goto L2; 649 } 650 assert(0); 651 L2: 652 } 653 else 654 pbln = &((*pbl).next); // next predecessor in list 655 } // for each pred(head) 656 657 // succ(head2) = succ(head) 658 foreach (bl; ListRange(head.Bsucc)) 659 { 660 list_append(&(head2.Bsucc),list_block(bl)); 661 list_append(&(list_block(bl).Bpred),head2); 662 } 663 if (debugc) printf("1Rotated loop %p\n", l); 664 go.changes++; 665 return true; 666 } 667 else if (startblock != head 668 /* This screws up the OPctor/OPdtor sequence for: 669 * struct CString 670 * { CString(); 671 * ~CString(); 672 * int GetLength(); 673 * }; 674 * 675 * void f(void) 676 * { for(;;) 677 * { CString s ; 678 * if(s.GetLength()!=0) 679 * break ; 680 * } 681 * } 682 */ 683 && !(config.flags3 & CFG3eh) 684 ) 685 { // optimize for space 686 // Simply position the header past the tail 687 foreach (b; BlockRange(startblock)) 688 { 689 if (b.Bnext == head) 690 { // found parent b of head 691 b.Bnext = head.Bnext; 692 head.Bnext = tail.Bnext; 693 tail.Bnext = head; 694 if (debugc) printf("2Rotated loop %p\n", l); 695 go.changes++; 696 return false; 697 } 698 } 699 assert(0); 700 } 701 Lret: 702 return false; 703 } 704 705 private __gshared 706 { 707 int gref; // parameter for markinvar() 708 block *gblock; // parameter for markinvar() 709 vec_t lv; // parameter for markinvar() 710 vec_t gin; // parameter for markinvar() 711 bool doflow; // true if flow analysis has to be redone 712 } 713 714 /********************************* 715 * Loop invariant and induction variable elimination. 716 * Input: 717 * iter which optimization iteration we are on 718 */ 719 720 void loopopt() 721 { 722 loop *ln; 723 vec_t rd; 724 loop *startloop; 725 726 if (debugc) printf("loopopt()\n"); 727 startloop = null; 728 restart: 729 file_progress(); 730 if (blockinit()) // init block data 731 { 732 findloops(dfo[], &startloop); // Compute Bweights 733 freeloop(&startloop); // free existing loops 734 return; // can't handle ASM blocks 735 } 736 compdom(); // compute dominators 737 findloops(dfo[], &startloop); // find the loops 738 739 for (loop *l = startloop; l; l = ln) 740 { 741 ln = l.Lnext; 742 if (looprotate(l)) // rotate the loop 743 { 744 compdfo(); 745 blockinit(); 746 compdom(); 747 findloops(dfo[], &startloop); // may trash l.Lnext 748 if (ln) 749 { ln = startloop; // start over 750 file_progress(); 751 } 752 } 753 } 754 // Make sure there is a preheader for each loop. 755 756 addblk = false; /* assume no blocks added */ 757 for (loop *l = startloop; l; l = l.Lnext) // for each loop 758 { 759 //if (debugc) l.print(); 760 761 if (!l.Lpreheader) /* if no preheader */ 762 { 763 block *h; 764 block *p; 765 766 if (debugc) printf("Generating preheader for loop\n"); 767 addblk = true; /* add one */ 768 p = block_calloc(); // the preheader 769 numblks++; 770 assert (numblks <= maxblks); 771 h = l.Lhead; /* loop header */ 772 773 /* Find parent of h */ 774 if (h == startblock) 775 startblock = p; 776 else 777 { 778 block *ph; 779 for (ph = startblock; 1; ph = ph.Bnext) 780 { 781 assert(ph); /* should have found it */ 782 if (ph.Bnext == h) 783 break; 784 } 785 /* Link p into block list between ph and h */ 786 ph.Bnext = p; 787 } 788 p.Bnext = h; 789 790 l.Lpreheader = p; 791 p.BC = BCgoto; 792 assert(p.Bsucc == null); 793 list_append(&(p.Bsucc),h); /* only successor is h */ 794 p.Btry = h.Btry; 795 796 if (debugc) printf("Adding preheader %p to loop %p\n",p,l); 797 798 // Move preds of h that aren't in the loop to preds of p 799 for (list_t bl = h.Bpred; bl;) 800 { 801 block *b = list_block(bl); 802 803 if (!vec_testbit (b.Bdfoidx, l.Lloop)) 804 { 805 list_append(&(p.Bpred), b); 806 list_subtract(&(h.Bpred), b); 807 bl = h.Bpred; /* dunno what subtract did */ 808 809 /* Fix up successors of predecessors */ 810 foreach (bls; ListRange(b.Bsucc)) 811 if (list_block(bls) == h) 812 bls.ptr = cast(void *)p; 813 } 814 else 815 bl = list_next(bl); 816 } 817 list_append(&(h.Bpred),p); /* p is a predecessor to h */ 818 } 819 } /* for */ 820 if (addblk) /* if any blocks were added */ 821 { 822 compdfo(); /* compute depth-first order */ 823 blockinit(); 824 compdom(); 825 findloops(dfo[], &startloop); // recompute block info 826 addblk = false; 827 } 828 829 /* Do the loop optimizations. Note that accessing the loops */ 830 /* starting from startloop will access them in least nested */ 831 /* one first, thus moving LIs out as far as possible. */ 832 833 doflow = true; /* do flow analysis */ 834 835 if (go.mfoptim & MFtime) 836 { 837 if (debugc) printf("Starting loop unrolling\n"); 838 for (loop *l = startloop; l; l = ln) 839 { 840 ln = l.Lnext; 841 if (loopunroll(l)) 842 { 843 compdfo(); // compute depth-first order 844 blockinit(); 845 compdom(); 846 findloops(dfo[], &startloop); // recompute block info 847 doflow = true; 848 if (ln) 849 { ln = startloop; // start over 850 file_progress(); 851 } 852 } 853 } 854 } 855 856 if (debugc) printf("Starting loop invariants\n"); 857 858 for (loop *l = startloop; l; l = l.Lnext) 859 { 860 uint i,j; 861 862 //if (debugc) l.print(); 863 864 file_progress(); 865 assert(l.Lpreheader); 866 if (doflow) 867 { 868 flowrd(); /* compute reaching definitions */ 869 flowlv(); /* compute live variables */ 870 flowae(); // compute available expressions 871 doflow = false; /* no need to redo it */ 872 if (go.defnod.length == 0) /* if no definition elems */ 873 break; /* no need to optimize */ 874 } 875 lv = l.Lloop; 876 if (debugc) printf("...Loop %p start...\n",l); 877 878 /* Unmark all elems in this loop */ 879 for (i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) 880 if (dfo[i].Belem) 881 unmarkall(dfo[i].Belem); /* unmark all elems */ 882 883 /* Find & mark all LIs */ 884 gin = vec_clone(l.Lpreheader.Bout); 885 rd = vec_calloc(go.defnod.length); /* allocate our running RD vector */ 886 for (i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) // for each block in loop 887 { 888 block *b = dfo[i]; 889 890 if (debugc) printf("B%d\n",i); 891 if (b.Belem) 892 { 893 vec_copy(rd, b.Binrd); // IN reaching defs 894 static if (0) 895 { 896 printf("i = %d\n",i); 897 { 898 for (int j = 0; j < go.defnod.length; j++) 899 elem_print(go.defnod[j].DNelem); 900 } 901 printf("rd : "); vec_println(rd); 902 } 903 gblock = b; 904 gref = 0; 905 if (b != l.Lhead) 906 gref = 1; 907 markinvar(b.Belem, rd); 908 static if (0) 909 { 910 printf("B%d\n", i); 911 { 912 foreach (j; 0 .. go.defnod.length) 913 { 914 printf(" [%2d] ", j); 915 WReqn(go.defnod[j].DNelem); 916 printf("\n"); 917 } 918 } 919 printf("rd : "); vec_println(rd); 920 printf("Boutrd: "); vec_println(b.Boutrd); 921 } 922 assert(vec_equal(rd, b.Boutrd)); 923 } 924 else 925 assert(vec_equal(b.Binrd, b.Boutrd)); 926 } 927 vec_free(rd); 928 vec_free(gin); 929 930 /* Move loop invariants */ 931 for (i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) 932 { 933 int domexit; // true if this block dominates all 934 // exit blocks of the loop 935 936 for (j = 0; (j = cast(uint) vec_index(j, l.Lexit)) < dfo.length; ++j) // for each exit block 937 { 938 if (!vec_testbit (i, dfo[j].Bdom)) 939 { 940 domexit = 0; 941 goto L1; // break if !(i dom j) 942 } 943 } 944 // if i dom (all exit blocks) 945 domexit = 1; 946 L1: 947 if (dfo[i].Belem) 948 { // If there is any hope of making an improvement 949 if (domexit || l.Llis) 950 { 951 //if (dfo[i] != l.Lhead) 952 //domexit |= 2; 953 movelis(dfo[i].Belem, dfo[i], l, &domexit); 954 } 955 } 956 } 957 //list_free(&l.Llis,FPnull); 958 if (debugc) printf("...Loop %p done...\n",l); 959 960 if (go.mfoptim & MFliv) 961 { 962 loopiv(l); /* induction variables */ 963 if (addblk) /* if we added a block */ 964 { 965 compdfo(); 966 goto restart; /* play it safe and start over */ 967 } 968 } 969 } /* for */ 970 freeloop(&startloop); 971 } 972 973 /***************************** 974 * If elem is loop invariant, mark it. 975 * Input: 976 * lv = vector of all the blocks in this loop. 977 * rd = vector of loop invariants for this elem. This must be 978 * continually updated. 979 * Note that we do not iterate until no more LIs are found. The only 980 * thing this would buy us is stuff that depends on LI assignments. 981 */ 982 983 private void markinvar(elem *n,vec_t rd) 984 { 985 vec_t tmp; 986 uint i; 987 Symbol *v; 988 elem *n1; 989 990 assert(n && rd); 991 assert(vec_numbits(rd) == go.defnod.length); 992 switch (n.Eoper) 993 { 994 case OPaddass: case OPminass: case OPmulass: case OPandass: 995 case OPorass: case OPxorass: case OPdivass: case OPmodass: 996 case OPshlass: case OPshrass: case OPashrass: 997 case OPpostinc: case OPpostdec: 998 case OPcall: 999 case OPvecsto: 1000 case OPcmpxchg: 1001 markinvar(n.EV.E2,rd); 1002 goto case OPnegass; 1003 1004 case OPnegass: 1005 n1 = n.EV.E1; 1006 if (n1.Eoper == OPind) 1007 markinvar(n1.EV.E1,rd); 1008 else if (OTbinary(n1.Eoper)) 1009 { markinvar(n1.EV.E1,rd); 1010 markinvar(n1.EV.E2,rd); 1011 } 1012 L2: 1013 if (n.Eoper == OPcall || 1014 gblock.Btry || 1015 !(n1.Eoper == OPvar && 1016 symbol_isintab(n1.EV.Vsym))) 1017 { 1018 gref = 1; 1019 } 1020 1021 updaterd(n,rd,null); 1022 break; 1023 1024 case OPcallns: 1025 markinvar(n.EV.E2,rd); 1026 markinvar(n.EV.E1,rd); 1027 break; 1028 1029 case OPstrcpy: 1030 case OPstrcat: 1031 case OPmemcpy: 1032 case OPmemset: 1033 markinvar(n.EV.E2,rd); 1034 markinvar(n.EV.E1,rd); 1035 updaterd(n,rd,null); 1036 break; 1037 1038 case OPbtc: 1039 case OPbtr: 1040 case OPbts: 1041 markinvar(n.EV.E1,rd); 1042 markinvar(n.EV.E2,rd); 1043 updaterd(n,rd,null); 1044 break; 1045 1046 case OPucall: 1047 markinvar(n.EV.E1,rd); 1048 goto case OPasm; 1049 1050 case OPasm: 1051 gref = 1; 1052 updaterd(n,rd,null); 1053 break; 1054 1055 case OPucallns: 1056 case OPstrpar: 1057 case OPstrctor: 1058 case OPvector: 1059 case OPvoid: 1060 case OPstrlen: 1061 case OPddtor: 1062 case OPinp: 1063 case OPprefetch: // don't mark E2 1064 markinvar(n.EV.E1,rd); 1065 break; 1066 1067 case OPcond: 1068 case OPparam: 1069 case OPstrcmp: 1070 case OPmemcmp: 1071 case OPbt: // OPbt is like OPind, assume not LI 1072 case OPoutp: 1073 markinvar(n.EV.E1,rd); 1074 markinvar(n.EV.E2,rd); 1075 break; 1076 1077 case OPandand: 1078 case OPoror: 1079 markinvar(n.EV.E1,rd); 1080 tmp = vec_clone(rd); 1081 markinvar(n.EV.E2,tmp); 1082 if (el_returns(n.EV.E2)) 1083 vec_orass(rd,tmp); // rd |= tmp 1084 vec_free(tmp); 1085 break; 1086 1087 case OPcolon: 1088 case OPcolon2: 1089 tmp = vec_clone(rd); 1090 switch (el_returns(n.EV.E1) * 2 | int(el_returns(n.EV.E2))) 1091 { 1092 case 3: // E1 and E2 return 1093 markinvar(n.EV.E1,rd); 1094 markinvar(n.EV.E2,tmp); 1095 vec_orass(rd,tmp); // rd |= tmp 1096 break; 1097 case 2: // E1 returns 1098 markinvar(n.EV.E1,rd); 1099 markinvar(n.EV.E2,tmp); 1100 break; 1101 case 1: // E2 returns 1102 markinvar(n.EV.E1,tmp); 1103 markinvar(n.EV.E2,rd); 1104 break; 1105 case 0: // neither returns 1106 markinvar(n.EV.E1,tmp); 1107 vec_copy(tmp,rd); 1108 markinvar(n.EV.E2,tmp); 1109 break; 1110 default: 1111 assert(0); 1112 } 1113 vec_free(tmp); 1114 break; 1115 1116 case OPaddr: // mark addresses of OPvars as LI 1117 markinvar(n.EV.E1,rd); 1118 if (n.EV.E1.Eoper == OPvar || isLI(n.EV.E1)) 1119 makeLI(n); 1120 break; 1121 1122 case OPmsw: 1123 case OPneg: case OPbool: case OPnot: case OPcom: 1124 case OPs16_32: case OPd_s32: case OPs32_d: 1125 case OPd_s16: case OPs16_d: case OPd_f: case OPf_d: 1126 case OP32_16: case OPu8_16: 1127 case OPld_d: case OPd_ld: 1128 case OPld_u64: 1129 case OPc_r: case OPc_i: 1130 case OPu16_32: 1131 case OPu16_d: case OPd_u16: 1132 case OPs8_16: case OP16_8: 1133 case OPd_u32: case OPu32_d: 1134 1135 case OPs32_64: case OPu32_64: 1136 case OP64_32: 1137 case OPd_s64: case OPd_u64: 1138 case OPs64_d: 1139 case OPu64_d: 1140 case OP128_64: 1141 case OPs64_128: 1142 case OPu64_128: 1143 1144 case OPabs: 1145 case OPrndtol: 1146 case OPrint: 1147 case OPsetjmp: 1148 case OPbsf: 1149 case OPbsr: 1150 case OPbswap: 1151 case OPpopcnt: 1152 case OPsqrt: 1153 case OPsin: 1154 case OPcos: 1155 case OPvp_fp: /* BUG for MacHandles */ 1156 case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp: 1157 case OPcvp_fp: 1158 case OPvecfill: 1159 markinvar(n.EV.E1,rd); 1160 if (isLI(n.EV.E1)) /* if child is LI */ 1161 makeLI(n); 1162 break; 1163 1164 case OPeq: 1165 case OPstreq: 1166 markinvar(n.EV.E2,rd); 1167 n1 = n.EV.E1; 1168 markinvar(n1,rd); 1169 1170 /* Determine if assignment is LI. Conditions are: */ 1171 /* 1) Rvalue is LI */ 1172 /* 2) Lvalue is a variable (simplifies things a lot) */ 1173 /* 3) Lvalue can only be affected by unambiguous defs */ 1174 /* 4) No rd's of lvalue that are within the loop (other */ 1175 /* than the current def) */ 1176 if (isLI(n.EV.E2) && n1.Eoper == OPvar) /* 1 & 2 */ 1177 { 1178 v = n1.EV.Vsym; 1179 if (v.Sflags & SFLunambig) 1180 { 1181 tmp = vec_calloc(go.defnod.length); 1182 //filterrd(tmp,rd,v); 1183 listrds(rd,n1,tmp); 1184 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) 1185 if (go.defnod[i].DNelem != n && 1186 vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1187 goto L3; 1188 makeLI(n); // then the def is LI 1189 L3: vec_free(tmp); 1190 } 1191 } 1192 goto L2; 1193 1194 case OPadd: case OPmin: case OPmul: case OPand: 1195 case OPor: case OPxor: case OPdiv: case OPmod: 1196 case OPshl: case OPshr: case OPeqeq: case OPne: 1197 case OPlt: case OPle: case OPgt: case OPge: 1198 case OPashr: 1199 case OPror: case OProl: 1200 case OPbtst: 1201 1202 case OPunord: case OPlg: case OPleg: case OPule: 1203 case OPul: case OPuge: case OPug: case OPue: 1204 case OPngt: case OPnge: case OPnlt: case OPnle: 1205 case OPord: case OPnlg: case OPnleg: case OPnule: 1206 case OPnul: case OPnuge: case OPnug: case OPnue: 1207 1208 case OPcomma: 1209 case OPpair: 1210 case OPrpair: 1211 case OPremquo: 1212 case OPscale: 1213 case OPyl2x: 1214 case OPyl2xp1: 1215 markinvar(n.EV.E1,rd); 1216 markinvar(n.EV.E2,rd); 1217 if (isLI(n.EV.E2) && isLI(n.EV.E1)) 1218 makeLI(n); 1219 break; 1220 1221 case OPind: /* must assume this is not LI */ 1222 markinvar(n.EV.E1,rd); 1223 if (isLI(n.EV.E1)) 1224 { 1225 static if (0) 1226 { 1227 // This doesn't work with C++, because exp2_ptrtocomtype() will 1228 // transfer const to where it doesn't belong. 1229 if (n.Ety & mTYconst) 1230 { 1231 makeLI(n); 1232 } 1233 1234 // This was disabled because it was marking as LI 1235 // the loop dimension for the [i] array if 1236 // a[j][i] was in a loop. This meant the a[j] array bounds 1237 // check for the a[j].length was skipped. 1238 else if (n.Ejty) 1239 { 1240 tmp = vec_calloc(go.defnod.length); 1241 filterrdind(tmp,rd,n); // only the RDs pertaining to n 1242 1243 // if (no RDs within loop) 1244 // then it's loop invariant 1245 1246 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) // for each RD 1247 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1248 goto L10; // found a RD in the loop 1249 1250 // If gref has occurred, this can still be LI 1251 // if n is an AE that was also an AE at the 1252 // point of gref. 1253 // We can catch a subset of these cases by looking 1254 // at the AEs at the start of the loop. 1255 if (gref) 1256 { 1257 int j; 1258 1259 //printf("\tn is: "); WReqn(n); printf("\n"); 1260 for (j = 0; (j = cast(uint) vec_index(j, gin)) < go.exptop; ++j) 1261 { 1262 elem *e = go.expnod[j]; 1263 1264 //printf("\t\tgo.expnod[%d] = %p\n",j,e); 1265 //printf("\t\tAE is: "); WReqn(e); printf("\n"); 1266 if (el_match2(n,e)) 1267 { 1268 makeLI(n); 1269 //printf("Ind LI: "); WReqn(n); printf("\n"); 1270 break; 1271 } 1272 } 1273 } 1274 else 1275 makeLI(n); 1276 L10: 1277 vec_free(tmp); 1278 break; 1279 } 1280 } 1281 } 1282 break; 1283 1284 case OPvar: 1285 v = n.EV.Vsym; 1286 if (v.Sflags & SFLunambig) // must be unambiguous to be LI 1287 { 1288 tmp = vec_calloc(go.defnod.length); 1289 //filterrd(tmp,rd,v); // only the RDs pertaining to v 1290 listrds(rd,n,tmp); // only the RDs pertaining to v 1291 1292 // if (no RDs within loop) 1293 // then it's loop invariant 1294 1295 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) // for each RD 1296 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1297 goto L1; // found a RD in the loop 1298 makeLI(n); 1299 1300 L1: vec_free(tmp); 1301 } 1302 break; 1303 1304 case OPstring: 1305 case OPrelconst: 1306 case OPconst: /* constants are always LI */ 1307 case OPframeptr: 1308 makeLI(n); 1309 break; 1310 1311 case OPinfo: 1312 markinvar(n.EV.E2,rd); 1313 break; 1314 1315 case OPstrthis: 1316 case OPmark: 1317 case OPctor: 1318 case OPdtor: 1319 case OPdctor: 1320 case OPhalt: 1321 case OPgot: // shouldn't OPgot be makeLI ? 1322 break; 1323 1324 default: 1325 WROP(n.Eoper); 1326 //printf("n.Eoper = %d, OPconst = %d\n", n.Eoper, OPconst); 1327 assert(0); 1328 } 1329 1330 debug 1331 { 1332 if (debugc && isLI(n)) 1333 { 1334 printf(" LI elem: "); 1335 WReqn(n); 1336 printf("\n"); 1337 } 1338 } 1339 } 1340 1341 /************************************** 1342 * Fill in the DefNode.DNumambig vector. 1343 * Set bits defnod[] indices for entries 1344 * which are completely destroyed when e is 1345 * unambiguously assigned to. 1346 * Params: 1347 * v = vector to fill in 1348 * e = defnod[] entry that is an assignment to a variable 1349 */ 1350 1351 extern (C) 1352 { 1353 void fillInDNunambig(vec_t v, elem *e) 1354 { 1355 assert(OTassign(e.Eoper)); 1356 elem *t = e.EV.E1; 1357 assert(t.Eoper == OPvar); 1358 Symbol *d = t.EV.Vsym; 1359 1360 targ_size_t toff = t.EV.Voffset; 1361 targ_size_t tsize = (e.Eoper == OPstreq) ? type_size(e.ET) : tysize(t.Ety); 1362 targ_size_t ttop = toff + tsize; 1363 1364 //printf("updaterd: "); WReqn(n); printf(" toff=%d, tsize=%d\n", toff, tsize); 1365 1366 1367 // for all unambig defs in go.defnod[] 1368 foreach (const i; 0 .. go.defnod.length) 1369 { 1370 elem *tn = go.defnod[i].DNelem; 1371 elem *tn1; 1372 targ_size_t tn1size; 1373 1374 if (!OTassign(tn.Eoper)) 1375 continue; 1376 1377 // If def of same variable, kill that def 1378 tn1 = tn.EV.E1; 1379 if (tn1.Eoper != OPvar || d != tn1.EV.Vsym) 1380 continue; 1381 1382 // If t completely overlaps tn1 1383 tn1size = (tn.Eoper == OPstreq) 1384 ? type_size(tn.ET) : tysize(tn1.Ety); 1385 if (toff <= tn1.EV.Voffset && 1386 tn1.EV.Voffset + tn1size <= ttop) 1387 { 1388 vec_setbit(cast(uint)i, v); 1389 } 1390 } 1391 } 1392 } 1393 1394 /******************** 1395 * Update rd vector. 1396 * Input: 1397 * n assignment elem or function call elem or OPasm elem 1398 * rd reaching def vector to update 1399 * (clear bits for defs we kill, set bit for n (which is the 1400 * def we are genning)) 1401 * vecdim go.defnod.length 1402 */ 1403 1404 extern (C) { 1405 void updaterd(elem *n,vec_t GEN,vec_t KILL) 1406 { 1407 const op = n.Eoper; 1408 elem *t; 1409 1410 assert(OTdef(op)); 1411 assert(GEN); 1412 elem_debug(n); 1413 1414 uint ni = n.Edef; 1415 assert(ni != -1); 1416 1417 // If unambiguous def 1418 if (OTassign(op) && (t = n.EV.E1).Eoper == OPvar) 1419 { 1420 vec_t v = go.defnod[ni].DNunambig; 1421 assert(v); 1422 if (KILL) 1423 vec_orass(KILL, v); 1424 vec_subass(GEN, v); 1425 } 1426 else 1427 { 1428 static if (0) 1429 { 1430 if (OTassign(op) && t.Eoper != OPvar && t.Ejty) 1431 { 1432 // for all unambig defs in go.defnod[] 1433 foreach (uint i; 0 .. go.defnod.length) 1434 { 1435 elem *tn = go.defnod[i].DNelem; 1436 elem *tn1; 1437 1438 if (tn == n) 1439 ni = i; 1440 1441 if (!OTassign(tn.Eoper)) 1442 continue; 1443 1444 // If def of same variable, kill that def 1445 tn1 = tn.EV.E1; 1446 if (tn1.Eoper != OPind || t.Ejty != tn1.Ejty) 1447 continue; 1448 1449 if (KILL) 1450 vec_setbit(i,KILL); 1451 vec_clearbit(i,GEN); 1452 } 1453 } 1454 } 1455 } 1456 1457 vec_setbit(ni,GEN); // set bit in GEN for this def 1458 } 1459 } 1460 1461 /*************************** 1462 * Mark all elems as not being loop invariant. 1463 */ 1464 1465 private void unmarkall(elem *e) 1466 { 1467 for (; 1; e = e.EV.E1) 1468 { 1469 assert(e); 1470 e.Nflags &= ~NFLli; /* unmark this elem */ 1471 if (OTunary(e.Eoper)) 1472 continue; 1473 else if (OTbinary(e.Eoper)) 1474 { 1475 unmarkall(e.EV.E2); 1476 continue; 1477 } 1478 return; 1479 } 1480 } 1481 1482 1483 /******************************** 1484 * Search for references to v in tree n before nstop is encountered. 1485 * Params: 1486 * v = symbol to search for 1487 * n = tree to search 1488 * nstop = stop searching tree when reaching this elem 1489 * Returns: 1490 * true if there are any refs of v in n before nstop is encountered 1491 */ 1492 1493 private bool refs(Symbol *v,elem *n,elem *nstop) 1494 { 1495 symbol_debug(v); 1496 assert(symbol_isintab(v)); 1497 assert(v.Ssymnum < globsym.top); 1498 bool stop = false; 1499 1500 // Walk tree in evaluation order 1501 bool walk(elem* n) 1502 { 1503 elem_debug(n); 1504 assert(n); 1505 1506 if (stop) 1507 return false; 1508 bool f = false; 1509 const op = n.Eoper; 1510 if (OTunary(op)) 1511 f = walk(n.EV.E1); 1512 else if (OTbinary(op)) 1513 { 1514 if (ERTOL(n)) /* watch order of evaluation */ 1515 { 1516 /* Note that (OPvar = e) is not a ref of OPvar, whereas */ 1517 /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */ 1518 /* a ref of OPvar, etc. */ 1519 f = walk(n.EV.E2); 1520 if (!f) 1521 { 1522 if (op == OPeq) 1523 { 1524 if (n.EV.E1.Eoper != OPvar) 1525 f = walk(n.EV.E1.EV.E1); 1526 } 1527 else 1528 f = walk(n.EV.E1); 1529 } 1530 } 1531 else 1532 f = walk(n.EV.E1) || walk(n.EV.E2); 1533 } 1534 1535 if (n == nstop) 1536 stop = true; 1537 else if (n.Eoper == OPvar) /* if variable reference */ 1538 return v == n.EV.Vsym; 1539 else if (op == OPasm) /* everything is referenced */ 1540 return true; 1541 return f; 1542 } 1543 1544 return walk(n); 1545 } 1546 1547 /************************* 1548 * Move LIs to preheader. 1549 * Conditions to be satisfied for code motion are: 1550 * 1) All exit blocks are dominated (true before this is called). 1551 * -- OR -- 1552 * 2) Variable assigned by a statement is not live on entering 1553 * any successor outside the loop of any exit block of the 1554 * loop. 1555 * 1556 * 3) Cannot move assignment to variable if there are any other 1557 * assignments to that variable within the loop (true or 1558 * assignment would not have been marked LI). 1559 * 4) Cannot move assignments to a variable if there is a use 1560 * of that variable in this loop that is reached by any other 1561 * def of it. 1562 * 5) Cannot move expressions that have side effects. 1563 * 6) Do not move assignments to variables that could be affected 1564 * by ambiguous defs. 1565 * 7) It is not worth it to move expressions of the form: 1566 * (var == const) 1567 * Input: 1568 * n the elem we're considering moving 1569 * b the block this elem is in 1570 * l the loop we're in 1571 * domexit flags 1572 * bit 0: 1 this branch is always executed 1573 * 0 this branch is only sometimes executed 1574 * bit 1: 1 do not move LIs that could throw exceptions 1575 * or cannot be moved past possibly thrown exceptions 1576 * Returns: 1577 * revised domexit 1578 */ 1579 1580 private void movelis(elem *n,block *b,loop *l,int *pdomexit) 1581 { 1582 vec_t tmp; 1583 elem *ne; 1584 elem *t; 1585 elem *n2; 1586 Symbol *v; 1587 tym_t ty; 1588 1589 Lnextlis: 1590 //if (isLI(n)) { printf("movelis("); WReqn(n); printf(")\n"); } 1591 assert(l && n); 1592 elem_debug(n); 1593 const op = n.Eoper; 1594 switch (op) 1595 { 1596 case OPvar: 1597 case OPconst: 1598 case OPrelconst: 1599 goto Lret; 1600 1601 case OPandand: 1602 case OPoror: 1603 case OPcond: 1604 { 1605 int domexit; 1606 1607 movelis(n.EV.E1,b,l,pdomexit); // always executed 1608 domexit = *pdomexit & ~1; // sometimes executed 1609 movelis(n.EV.E2,b,l,&domexit); 1610 *pdomexit |= domexit & 2; 1611 goto Lret; 1612 } 1613 1614 case OPeq: 1615 // Do loop invariant assignments 1616 if (isLI(n) && n.EV.E1.Eoper == OPvar) 1617 { 1618 v = n.EV.E1.EV.Vsym; // variable index number 1619 1620 if (!(v.Sflags & SFLunambig)) goto L3; // case 6 1621 1622 // If case 4 is not satisfied, return 1623 1624 // Function parameters have an implied definition prior to the 1625 // first block of the function. Unfortunately, the rd vector 1626 // does not take this into account. Therefore, we assume the 1627 // worst and reject assignments to function parameters. 1628 if (v.Sclass == SCparameter || v.Sclass == SCregpar || 1629 v.Sclass == SCfastpar || v.Sclass == SCshadowreg) 1630 goto L3; 1631 1632 if (el_sideeffect(n.EV.E2)) goto L3; // case 5 1633 1634 // If case 1 or case 2 is not satisfied, return 1635 1636 if (!(*pdomexit & 1)) // if not case 1 1637 { 1638 uint i; 1639 for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 1640 { 1641 foreach (bl; ListRange(dfo[i].Bsucc)) 1642 { 1643 block *s; // successor to exit block 1644 1645 s = list_block(bl); 1646 if (!vec_testbit(s.Bdfoidx,l.Lloop) && 1647 (!symbol_isintab(v) || 1648 vec_testbit(v.Ssymnum,s.Binlv))) // if v is live on exit 1649 goto L3; 1650 } 1651 } 1652 } 1653 1654 tmp = vec_calloc(go.defnod.length); 1655 uint i; 1656 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 1657 { 1658 if (dfo[i] == b) // except this one 1659 continue; 1660 1661 //<if there are any RDs of v in Binrd other than n> 1662 // <if there are any refs of v in that block> 1663 // return; 1664 1665 //filterrd(tmp,dfo[i].Binrd,v); 1666 listrds(dfo[i].Binrd,n.EV.E1,tmp); 1667 uint j; 1668 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j) // for each RD of v in Binrd 1669 { 1670 if (go.defnod[j].DNelem == n) 1671 continue; 1672 if (dfo[i].Belem && 1673 refs(v,dfo[i].Belem,cast(elem *)null)) //if refs of v 1674 { 1675 vec_free(tmp); 1676 goto L3; 1677 } 1678 break; 1679 } 1680 } // foreach 1681 1682 // <if there are any RDs of v in b.Binrd other than n> 1683 // <if there are any references to v before the 1684 // assignment to v> 1685 // <can't move this assignment> 1686 1687 //filterrd(tmp,b.Binrd,v); 1688 listrds(b.Binrd,n.EV.E1,tmp); 1689 uint j; 1690 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j) // for each RD of v in Binrd 1691 { 1692 if (go.defnod[j].DNelem == n) 1693 continue; 1694 if (b.Belem && refs(v,b.Belem,n)) 1695 { 1696 vec_free(tmp); 1697 goto L3; // can't move it 1698 } 1699 break; // avoid redundant looping 1700 } 1701 vec_free(tmp); 1702 1703 // We have an LI assignment, n. 1704 // Check to see if the rvalue is already in the preheader. 1705 foreach (nl; ListRange(l.Llis)) 1706 { 1707 if (el_match(n.EV.E2,list_elem(nl).EV.E2)) 1708 { 1709 el_free(n.EV.E2); 1710 n.EV.E2 = el_calloc(); 1711 el_copy(n.EV.E2,list_elem(nl).EV.E1); 1712 if (debugc) printf("LI assignment rvalue was replaced\n"); 1713 doflow = true; 1714 go.changes++; 1715 break; 1716 } 1717 } 1718 1719 // move assignment elem to preheader 1720 if (debugc) printf("Moved LI assignment "); 1721 1722 debug 1723 if (debugc) 1724 { 1725 WReqn(n); 1726 printf(";\n"); 1727 } 1728 1729 go.changes++; 1730 doflow = true; // redo flow analysis 1731 ne = el_calloc(); 1732 el_copy(ne,n); // create assignment elem 1733 assert(l.Lpreheader); // make sure there is one 1734 appendelem(ne,&(l.Lpreheader.Belem)); // append ne to preheader 1735 list_prepend(&l.Llis,ne); 1736 1737 el_copy(n,ne.EV.E1); // replace n with just a reference to v 1738 goto Lret; 1739 } // if 1740 break; 1741 1742 case OPcall: 1743 case OPucall: 1744 *pdomexit |= 2; 1745 break; 1746 1747 case OPpair: 1748 case OPrpair: // don't move these, as they do not do computation 1749 movelis(n.EV.E1,b,l,pdomexit); 1750 n = n.EV.E2; 1751 goto Lnextlis; 1752 1753 default: 1754 break; 1755 } 1756 1757 L3: 1758 // Do leaves of non-LI expressions, leaves of = elems that didn't 1759 // meet the invariant assignment removal criteria, and don't do leaves 1760 if (OTleaf(op)) 1761 goto Lret; 1762 if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot || 1763 // These are usually addressing modes, so moving them is a net loss 1764 (I32 && op == OPshl && n.EV.E2.Eoper == OPconst && el_tolong(n.EV.E2) <= 3UL) 1765 ) 1766 { 1767 if (OTassign(op)) 1768 { 1769 elem *n1 = n.EV.E1; 1770 elem *n11; 1771 1772 if (OTbinary(op)) 1773 movelis(n.EV.E2,b,l,pdomexit); 1774 1775 // Do lvalue only if it is an expression 1776 if (n1.Eoper == OPvar) 1777 goto Lret; 1778 n11 = n1.EV.E1; 1779 if (OTbinary(n1.Eoper)) 1780 { 1781 movelis(n11,b,l,pdomexit); 1782 n = n1.EV.E2; 1783 } 1784 // If *(x + c), just make x the LI, not the (x + c). 1785 // The +c comes free with the addressing mode. 1786 else if (n1.Eoper == OPind && 1787 isLI(n11) && 1788 n11.Eoper == OPadd && 1789 n11.EV.E2.Eoper == OPconst 1790 ) 1791 { 1792 n = n11.EV.E1; 1793 } 1794 else 1795 n = n11; 1796 movelis(n,b,l,pdomexit); 1797 if (b.Btry || !(n1.Eoper == OPvar && symbol_isintab(n1.EV.Vsym))) 1798 { 1799 //printf("assign to global => domexit |= 2\n"); 1800 *pdomexit |= 2; 1801 } 1802 } 1803 else if (OTunary(op)) 1804 { 1805 elem *e1 = n.EV.E1; 1806 1807 // If *(x + c), just make x the LI, not the (x + c). 1808 // The +c comes free with the addressing mode. 1809 if (op == OPind && 1810 isLI(e1) && 1811 e1.Eoper == OPadd && 1812 e1.EV.E2.Eoper == OPconst 1813 ) 1814 { 1815 n = e1.EV.E1; 1816 } 1817 else 1818 n = e1; 1819 } 1820 else if (OTbinary(op)) 1821 { 1822 movelis(n.EV.E1,b,l,pdomexit); 1823 n = n.EV.E2; 1824 } 1825 goto Lnextlis; 1826 } 1827 1828 if (el_sideeffect(n)) 1829 goto Lret; 1830 1831 static if (0) 1832 { 1833 printf("*pdomexit = %d\n",*pdomexit); 1834 if (*pdomexit & 2) 1835 { 1836 // If any indirections, can't LI it 1837 1838 // If this operand has already been indirected, we can let 1839 // it pass. 1840 Symbol *s; 1841 1842 printf("looking at:\n"); 1843 elem_print(n); 1844 s = el_basesym(n.EV.E1); 1845 if (s) 1846 { 1847 foreach (nl; ListRange(l.Llis)) 1848 { 1849 elem *el = list_elem(nl); 1850 el = el.EV.E2; 1851 elem_print(el); 1852 if (el.Eoper == OPind && el_basesym(el.EV.E1) == s) 1853 { 1854 printf(" pass!\n"); 1855 goto Lpass; 1856 } 1857 } 1858 } 1859 printf(" skip!\n"); 1860 goto Lret; 1861 1862 Lpass: 1863 } 1864 } 1865 1866 // Move the LI expression to the preheader 1867 if (debugc) printf("Moved LI expression "); 1868 1869 debug 1870 if (debugc) 1871 { 1872 WReqn(n); 1873 printf(";\n"); 1874 } 1875 1876 // See if it's already been moved 1877 ty = n.Ety; 1878 foreach (nl; ListRange(l.Llis)) 1879 { 1880 elem *el; 1881 tym_t ty2; 1882 1883 el = list_elem(nl); 1884 //printf("existing LI: "); WReqn(el); printf("\n"); 1885 ty2 = el.EV.E2.Ety; 1886 if (tysize(ty) == tysize(ty2)) 1887 { 1888 el.EV.E2.Ety = ty; 1889 if (el_match(n,el.EV.E2)) 1890 { 1891 el.EV.E2.Ety = ty2; 1892 if (!OTleaf(n.Eoper)) 1893 { el_free(n.EV.E1); 1894 if (OTbinary(n.Eoper)) 1895 el_free(n.EV.E2); 1896 } 1897 el_copy(n,el.EV.E1); // make copy of temp 1898 n.Ety = ty; 1899 1900 debug 1901 if (debugc) 1902 { printf("Already moved: LI expression replaced with "); 1903 WReqn(n); 1904 printf("\nPreheader %d expression %p ", 1905 l.Lpreheader.Bdfoidx,l.Lpreheader.Belem); 1906 WReqn(l.Lpreheader.Belem); 1907 printf("\n"); 1908 } 1909 1910 go.changes++; 1911 doflow = true; // redo flow analysis 1912 goto Lret; 1913 } 1914 el.EV.E2.Ety = ty2; 1915 } 1916 } 1917 1918 if (!(*pdomexit & 1)) // if only sometimes executed 1919 { 1920 if (debugc) printf(" doesn't dominate exit\n"); 1921 goto Lret; // don't move LI 1922 } 1923 1924 if (tyaggregate(n.Ety)) 1925 goto Lret; 1926 1927 go.changes++; 1928 doflow = true; // redo flow analysis 1929 1930 t = el_alloctmp(n.Ety); /* allocate temporary t */ 1931 1932 debug 1933 { 1934 if (debugc) printf("movelis() introduced new variable '%s' of type ",t.EV.Vsym.Sident.ptr); 1935 if (debugc) WRTYxx(t.Ety); 1936 if (debugc) printf("\n"); 1937 } 1938 1939 n2 = el_calloc(); 1940 el_copy(n2,n); /* create copy n2 of n */ 1941 ne = el_bin(OPeq,t.Ety,t,n2); /* create elem t=n2 */ 1942 assert(l.Lpreheader); /* make sure there is one */ 1943 appendelem(ne,&(l.Lpreheader.Belem)); /* append ne to preheader */ 1944 1945 debug 1946 if (debugc) 1947 { 1948 printf("Preheader %d expression %p\n\t", 1949 l.Lpreheader.Bdfoidx,l.Lpreheader.Belem); 1950 WReqn(l.Lpreheader.Belem); 1951 printf("\nLI expression replaced with "); WReqn(t); 1952 printf("\n"); 1953 } 1954 1955 el_copy(n,t); /* replace this elem with t */ 1956 1957 // Remember LI expression in elem list 1958 list_prepend(&l.Llis,ne); 1959 1960 Lret: 1961 1962 } 1963 1964 /*************************** 1965 * Append elem to existing elem using an OPcomma elem. 1966 * Input: 1967 * n elem to append 1968 * *pn elem to append to 1969 */ 1970 1971 private void appendelem(elem *n,elem **pn) 1972 { 1973 assert(n && pn); 1974 if (*pn) /* if this elem exists */ 1975 { 1976 while ((*pn).Eoper == OPcomma) /* while we see OPcomma elems */ 1977 { 1978 (*pn).Ety = n.Ety; 1979 pn = &((*pn).EV.E2); /* cruise down right side */ 1980 } 1981 *pn = el_bin(OPcomma,n.Ety,*pn,n); 1982 } 1983 else 1984 *pn = n; /* else create a elem */ 1985 } 1986 1987 /************** LOOP INDUCTION VARIABLES **********************/ 1988 1989 /********************* 1990 * Free iv list. 1991 */ 1992 1993 private void freeivlist(Iv *biv) 1994 { 1995 Iv *bivnext; 1996 1997 while (biv) 1998 { 1999 famlist *fln; 2000 2001 for (famlist *fl = biv.IVfamily; fl; fl = fln) 2002 { 2003 el_free(fl.c1); 2004 el_free(fl.c2); 2005 fln = fl.FLnext; 2006 2007 fl.FLnext = famlist.freelist; 2008 famlist.freelist = fl; 2009 } 2010 bivnext = biv.IVnext; 2011 2012 biv.IVnext = Iv.freelist; 2013 Iv.freelist = biv; 2014 2015 biv = bivnext; 2016 } 2017 } 2018 2019 /**************************** 2020 * Create a new famlist entry. 2021 */ 2022 2023 private famlist * newfamlist(tym_t ty) 2024 { 2025 famlist *fl; 2026 eve c = void; 2027 2028 memset(&c,0,c.sizeof); 2029 fl = famlist.mycalloc(); 2030 fl.FLty = ty; 2031 switch (tybasic(ty)) 2032 { case TYfloat: 2033 c.Vfloat = 1; 2034 break; 2035 2036 case TYdouble: 2037 case TYdouble_alias: 2038 c.Vdouble = 1; 2039 break; 2040 2041 case TYldouble: 2042 c.Vldouble = 1; 2043 break; 2044 2045 case TYbool: 2046 case TYchar: 2047 case TYschar: 2048 case TYuchar: 2049 c.Vchar = 1; 2050 break; 2051 2052 case TYshort: 2053 case TYushort: 2054 case TYchar16: 2055 case TYwchar_t: // BUG: what about 4 byte wchar_t's? 2056 c.Vshort = 1; 2057 break; 2058 2059 case TYsptr: 2060 case TYcptr: 2061 case TYfptr: 2062 case TYvptr: 2063 case TYnptr: 2064 case TYnullptr: 2065 case TYimmutPtr: 2066 case TYsharePtr: 2067 case TYrestrictPtr: 2068 case TYfgPtr: 2069 ty = TYint; 2070 if (I64) 2071 ty = TYllong; 2072 goto case TYuint; 2073 2074 case TYint: 2075 case TYuint: 2076 c.Vint = 1; 2077 break; 2078 2079 case TYhptr: 2080 ty = TYlong; 2081 goto case TYlong; 2082 2083 case TYlong: 2084 case TYulong: 2085 case TYdchar: 2086 default: 2087 c.Vlong = 1; 2088 break; 2089 } 2090 fl.c1 = el_const(ty,&c); /* c1 = 1 */ 2091 c.Vldouble = 0; 2092 if (typtr(ty)) 2093 { 2094 ty = TYint; 2095 if (tybasic(ty) == TYhptr) 2096 ty = TYlong; 2097 if (I64) 2098 ty = TYllong; 2099 } 2100 fl.c2 = el_const(ty,&c); /* c2 = 0 */ 2101 return fl; 2102 } 2103 2104 /*************************** 2105 * Remove induction variables from loop l. 2106 * Loop invariant removal should have been done just previously. 2107 */ 2108 2109 private void loopiv(loop *l) 2110 { 2111 if (debugc) printf("loopiv(%p)\n",l); 2112 assert(l.Livlist == null && l.Lopeqlist == null); 2113 elimspec(l); 2114 if (doflow) 2115 { 2116 flowrd(); /* compute reaching defs */ 2117 flowlv(); /* compute live variables */ 2118 flowae(); // compute available expressions 2119 doflow = false; 2120 } 2121 findbasivs(l); /* find basic induction variables */ 2122 findopeqs(l); // find op= variables 2123 findivfams(l); /* find IV families */ 2124 elimfrivivs(l); /* eliminate less useful family IVs */ 2125 intronvars(l); /* introduce new variables */ 2126 elimbasivs(l); /* eliminate basic IVs */ 2127 if (!addblk) // adding a block changes the Binlv 2128 elimopeqs(l); // eliminate op= variables 2129 2130 freeivlist(l.Livlist); // free up IV list 2131 l.Livlist = null; 2132 freeivlist(l.Lopeqlist); // free up list 2133 l.Lopeqlist = null; 2134 2135 /* Do copy propagation and dead assignment elimination */ 2136 /* upon return to optfunc() */ 2137 } 2138 2139 /************************************* 2140 * Find basic IVs of loop l. 2141 * A basic IV x of loop l is a variable x which has 2142 * exactly one assignment within l of the form: 2143 * x += c or x -= c, where c is either a constant 2144 * or a LI. 2145 * Input: 2146 * go.defnod[] loaded with all the definition elems of the loop 2147 */ 2148 2149 private void findbasivs(loop *l) 2150 { 2151 vec_t poss,notposs; 2152 elem *n; 2153 bool ambdone; 2154 2155 assert(l); 2156 ambdone = false; 2157 poss = vec_calloc(globsym.top); 2158 notposs = vec_calloc(globsym.top); /* vector of all variables */ 2159 /* (initially all unmarked) */ 2160 2161 /* for each def in go.defnod[] that is within loop l */ 2162 2163 foreach (const i; 0 .. go.defnod.length) 2164 { 2165 if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop)) 2166 continue; /* def is not in the loop */ 2167 2168 n = go.defnod[i].DNelem; 2169 elem_debug(n); 2170 if (OTassign(n.Eoper) && n.EV.E1.Eoper == OPvar) 2171 { 2172 Symbol *s; /* if unambiguous def */ 2173 2174 s = n.EV.E1.EV.Vsym; 2175 if (symbol_isintab(s)) 2176 { 2177 SYMIDX v; 2178 2179 v = n.EV.E1.EV.Vsym.Ssymnum; 2180 if ((n.Eoper == OPaddass || n.Eoper == OPminass || 2181 n.Eoper == OPpostinc || n.Eoper == OPpostdec) && 2182 (n.EV.E2.Eoper == OPconst || /* if x += c or x -= c */ 2183 n.EV.E2.Eoper == OPvar && isLI(n.EV.E2))) 2184 { 2185 if (vec_testbit(v,poss)) 2186 /* We've already seen this def elem, */ 2187 /* therefore there is more than one */ 2188 /* def of v within the loop, therefore */ 2189 /* v is not a basic IV. */ 2190 vec_setbit(v,notposs); 2191 else 2192 vec_setbit(v,poss); 2193 } 2194 else /* else mark as not possible */ 2195 vec_setbit(v,notposs); 2196 } 2197 } 2198 else /* else ambiguous def */ 2199 { 2200 /* mark any vars that could be affected by */ 2201 /* this def as not possible */ 2202 2203 if (!ambdone) /* avoid redundant loops */ 2204 { 2205 foreach (uint j; 0 .. globsym.top) 2206 { 2207 if (!(globsym.tab[j].Sflags & SFLunambig)) 2208 vec_setbit(j,notposs); 2209 } 2210 ambdone = true; 2211 } 2212 } 2213 } 2214 static if (0) 2215 { 2216 printf("poss "); vec_println(poss); 2217 printf("notposs "); vec_println(notposs); 2218 } 2219 vec_subass(poss,notposs); /* poss = poss - notposs */ 2220 2221 /* create list of IVs */ 2222 uint i; 2223 for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.top; ++i) // for each basic IV 2224 { 2225 Iv *biv; 2226 Symbol *s; 2227 2228 /* Skip if we don't want it to be a basic IV (see funcprev()) */ 2229 s = globsym.tab[i]; 2230 assert(symbol_isintab(s)); 2231 if (s.Sflags & SFLnotbasiciv) 2232 continue; 2233 2234 // Do not use aggregates as basic IVs. This is because the other loop 2235 // code doesn't check offsets into symbols, (assuming everything 2236 // is at offset 0). We could perhaps amend this by allowing basic IVs 2237 // if the struct consists of only one data member. 2238 if (tyaggregate(s.ty())) 2239 continue; 2240 2241 // Do not use complex types as basic IVs, as the code gen isn't up to it 2242 if (tycomplex(s.ty())) 2243 continue; 2244 2245 biv = Iv.mycalloc(); 2246 biv.IVnext = l.Livlist; 2247 l.Livlist = biv; // link into list of IVs 2248 2249 biv.IVbasic = s; // symbol of basic IV 2250 2251 if (debugc) printf("Symbol '%s' (%d) is a basic IV, ", s.Sident.ptr, i); 2252 2253 /* We have the sym idx of the basic IV. We need to find */ 2254 /* the parent of the increment elem for it. */ 2255 2256 /* First find the go.defnod[] */ 2257 foreach (j; 0 .. go.defnod.length) 2258 { 2259 /* If go.defnod is a def of i and it is in the loop */ 2260 if (go.defnod[j].DNelem.EV.E1 && /* OPasm are def nodes */ 2261 go.defnod[j].DNelem.EV.E1.EV.Vsym == s && 2262 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop)) 2263 { 2264 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem)); 2265 assert(s == (*biv.IVincr).EV.E1.EV.Vsym); 2266 2267 debug if (debugc) 2268 { 2269 printf("Increment elem is: "); WReqn(*biv.IVincr); printf("\n"); 2270 } 2271 goto L1; 2272 } 2273 } 2274 assert(0); /* should have found it */ 2275 /* NOTREACHED */ 2276 2277 L1: 2278 } 2279 2280 vec_free(poss); 2281 vec_free(notposs); 2282 } 2283 2284 /************************************* 2285 * Find op= elems of loop l. 2286 * Analogous to findbasivs(). 2287 * Used to eliminate useless loop code normally found in benchmark programs. 2288 * Input: 2289 * go.defnod[] loaded with all the definition elems of the loop 2290 */ 2291 2292 private void findopeqs(loop *l) 2293 { 2294 vec_t poss,notposs; 2295 elem *n; 2296 bool ambdone; 2297 2298 assert(l); 2299 ambdone = false; 2300 poss = vec_calloc(globsym.top); 2301 notposs = vec_calloc(globsym.top); // vector of all variables 2302 // (initially all unmarked) 2303 2304 // for each def in go.defnod[] that is within loop l 2305 2306 foreach (i; 0 .. go.defnod.length) 2307 { 2308 if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop)) 2309 continue; // def is not in the loop 2310 2311 n = go.defnod[i].DNelem; 2312 elem_debug(n); 2313 if (OTopeq(n.Eoper) && n.EV.E1.Eoper == OPvar) 2314 { 2315 Symbol *s; // if unambiguous def 2316 2317 s = n.EV.E1.EV.Vsym; 2318 if (symbol_isintab(s)) 2319 { 2320 SYMIDX v; 2321 2322 v = n.EV.E1.EV.Vsym.Ssymnum; 2323 { 2324 if (vec_testbit(v,poss)) 2325 // We've already seen this def elem, 2326 // therefore there is more than one 2327 // def of v within the loop, therefore 2328 // v is not a basic IV. 2329 vec_setbit(v,notposs); 2330 else 2331 vec_setbit(v,poss); 2332 } 2333 } 2334 } 2335 else // else ambiguous def 2336 { 2337 // mark any vars that could be affected by 2338 // this def as not possible 2339 2340 if (!ambdone) // avoid redundant loops 2341 { 2342 foreach (j; 0 .. globsym.top) 2343 { 2344 if (!(globsym.tab[j].Sflags & SFLunambig)) 2345 vec_setbit(j,notposs); 2346 } 2347 ambdone = true; 2348 } 2349 } 2350 } 2351 2352 // Don't use symbols already in Livlist 2353 for (Iv *iv = l.Livlist; iv; iv = iv.IVnext) 2354 { 2355 Symbol *s; 2356 2357 s = iv.IVbasic; 2358 vec_setbit(s.Ssymnum,notposs); 2359 } 2360 2361 2362 static if (0) 2363 { 2364 printf("poss "); vec_println(poss); 2365 printf("notposs "); vec_println(notposs); 2366 } 2367 2368 vec_subass(poss,notposs); // poss = poss - notposs 2369 2370 // create list of IVs 2371 uint i; 2372 for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.top; ++i) // for each opeq IV 2373 { 2374 Iv *biv; 2375 Symbol *s; 2376 2377 s = globsym.tab[i]; 2378 assert(symbol_isintab(s)); 2379 2380 // Do not use aggregates as basic IVs. This is because the other loop 2381 // code doesn't check offsets into symbols, (assuming everything 2382 // is at offset 0). We could perhaps amend this by allowing basic IVs 2383 // if the struct consists of only one data member. 2384 if (tyaggregate(s.ty())) 2385 continue; 2386 2387 biv = Iv.mycalloc(); 2388 biv.IVnext = l.Lopeqlist; 2389 l.Lopeqlist = biv; // link into list of IVs 2390 2391 biv.IVbasic = s; // symbol of basic IV 2392 2393 if (debugc) printf("Symbol '%s' (%d) is an opeq IV, ",s.Sident.ptr,i); 2394 2395 // We have the sym idx of the basic IV. We need to find 2396 // the parent of the increment elem for it. 2397 2398 // First find the go.defnod[] 2399 foreach (j; 0 .. go.defnod.length) 2400 { 2401 // If go.defnod is a def of i and it is in the loop 2402 if (go.defnod[j].DNelem.EV.E1 && // OPasm are def nodes 2403 go.defnod[j].DNelem.EV.E1.EV.Vsym == s && 2404 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop)) 2405 { 2406 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem)); 2407 assert(s == (*biv.IVincr).EV.E1.EV.Vsym); 2408 2409 debug if (debugc) 2410 { 2411 printf("Opeq elem is: "); WReqn(*biv.IVincr); printf("\n"); 2412 } 2413 goto Lcont; 2414 } 2415 } 2416 assert(0); // should have found it 2417 // NOTREACHED 2418 2419 Lcont: 2420 } 2421 2422 vec_free(poss); 2423 vec_free(notposs); 2424 } 2425 2426 /***************************** 2427 * Find families for each basic IV. 2428 * An IV family is a list of elems of the form 2429 * c1*X+c2, where X is a basic induction variable. 2430 * Note that we do not do divides, because of roundoff error problems. 2431 */ 2432 2433 private void findivfams(loop *l) 2434 { 2435 if (debugc) printf("findivfams(%p)\n",l); 2436 for (Iv *biv = l.Livlist; biv; biv = biv.IVnext) 2437 { 2438 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 2439 if (dfo[i].Belem) 2440 ivfamelems(biv,&(dfo[i].Belem)); 2441 /* Fold all the constant expressions in c1 and c2. */ 2442 for (famlist *fl = biv.IVfamily; fl; fl = fl.FLnext) 2443 { 2444 fl.c1 = doptelem(fl.c1,GOALvalue | GOALagain); 2445 fl.c2 = doptelem(fl.c2,GOALvalue | GOALagain); 2446 } 2447 } 2448 } 2449 2450 /************************* 2451 * Tree walking support routine for findivfams(). 2452 * biv = basic induction variable pointer 2453 * pn pointer to elem 2454 */ 2455 2456 private void ivfamelems(Iv *biv,elem **pn) 2457 { 2458 tym_t ty,c2ty; 2459 elem *n; 2460 elem *n1; 2461 elem *n2; 2462 2463 assert(pn); 2464 n = *pn; 2465 assert(biv && n); 2466 const op = n.Eoper; 2467 if (OTunary(op)) 2468 { 2469 ivfamelems(biv,&n.EV.E1); 2470 n1 = n.EV.E1; 2471 n2 = null; 2472 } 2473 else if (OTbinary(op)) 2474 { 2475 ivfamelems(biv,&n.EV.E1); 2476 ivfamelems(biv,&n.EV.E2); /* LTOR or RTOL order is unimportant */ 2477 n1 = n.EV.E1; 2478 n2 = n.EV.E2; 2479 } 2480 else /* else leaf elem */ 2481 return; /* which can't be in the family */ 2482 2483 if (tycomplex(n.Ety)) 2484 return; 2485 2486 if (op == OPmul || op == OPadd || op == OPmin || 2487 op == OPneg || op == OPshl) 2488 { /* Note that we are wimping out and not considering */ 2489 /* LI variables as part of c1 and c2, but only constants. */ 2490 2491 ty = n.Ety; 2492 2493 /* Since trees are canonicalized, basic induction variables */ 2494 /* will only appear on the left. */ 2495 2496 /* Improvement: */ 2497 /* We wish to pick up the cases (biv + li), (biv - li) and */ 2498 /* (li + biv). OPmul and LS with bivs are out, since if we */ 2499 /* try to eliminate the biv, and the loop test is a >, >=, */ 2500 /* <, <=, we have a problem since we don't know if the li */ 2501 /* is negative. (Would have to call swaprel() on it.) */ 2502 2503 /* If we have (li + var), swap the leaves. */ 2504 if (op == OPadd && isLI(n1) && n1.Eoper == OPvar && n2.Eoper == OPvar) 2505 { 2506 n.EV.E1 = n2; 2507 n2 = n.EV.E2 = n1; 2508 n1 = n.EV.E1; 2509 } 2510 2511 // Get rid of case where we painted a far pointer to a long 2512 if (op == OPadd || op == OPmin) 2513 { 2514 int sz; 2515 2516 sz = tysize(ty); 2517 if (sz == tysize(TYfptr) && !tyfv(ty) && 2518 (sz != tysize(n1.Ety) || sz != tysize(n2.Ety))) 2519 return; 2520 } 2521 2522 /* Look for function of basic IV (-biv or biv op const) */ 2523 if (n1.Eoper == OPvar && n1.EV.Vsym == biv.IVbasic) 2524 { 2525 if (op == OPneg) 2526 { 2527 famlist *fl; 2528 2529 if (debugc) printf("found (-biv), elem %p\n",n); 2530 fl = newfamlist(ty); 2531 fl.FLivty = n1.Ety; 2532 fl.FLpelem = pn; 2533 fl.FLnext = biv.IVfamily; 2534 biv.IVfamily = fl; 2535 fl.c1 = el_una(op,ty,fl.c1); /* c1 = -1 */ 2536 } 2537 else if (n2.Eoper == OPconst || 2538 isLI(n2) && (op == OPadd || op == OPmin)) 2539 { 2540 famlist *fl; 2541 2542 debug 2543 if (debugc) 2544 { printf("found (biv op const), elem ("); 2545 WReqn(n); 2546 printf(");\n"); 2547 printf("Types: n1="); WRTYxx(n1.Ety); 2548 printf(" ty="); WRTYxx(ty); 2549 printf(" n2="); WRTYxx(n2.Ety); 2550 printf("\n"); 2551 } 2552 2553 fl = newfamlist(ty); 2554 fl.FLivty = n1.Ety; 2555 fl.FLpelem = pn; 2556 fl.FLnext = biv.IVfamily; 2557 biv.IVfamily = fl; 2558 switch (op) 2559 { 2560 case OPadd: /* c2 = right */ 2561 c2ty = n2.Ety; 2562 if (typtr(fl.c2.Ety)) 2563 c2ty = fl.c2.Ety; 2564 goto L1; 2565 2566 case OPmin: /* c2 = -right */ 2567 c2ty = fl.c2.Ety; 2568 /* Check for subtracting two pointers */ 2569 if (typtr(c2ty) && typtr(n2.Ety)) 2570 { 2571 if (tybasic(c2ty) == TYhptr) 2572 c2ty = TYlong; 2573 else 2574 c2ty = I64 ? TYllong : TYint; 2575 } 2576 L1: 2577 fl.c2 = el_bin(op,c2ty,fl.c2,el_copytree(n2)); 2578 break; 2579 2580 case OPmul: /* c1 = right */ 2581 case OPshl: /* c1 = 1 << right */ 2582 fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2)); 2583 break; 2584 2585 default: 2586 assert(0); 2587 } 2588 } 2589 } 2590 2591 /* Look for function of existing IV */ 2592 2593 for (famlist *f = biv.IVfamily; f; f = f.FLnext) 2594 { 2595 if (*f.FLpelem != n1) /* not it */ 2596 continue; 2597 2598 /* Look for (f op constant) */ 2599 if (op == OPneg) 2600 { 2601 if (debugc) printf("found (-f), elem %p\n",n); 2602 /* c1 = -c1; c2 = -c2; */ 2603 f.c1 = el_una(OPneg,ty,f.c1); 2604 f.c2 = el_una(OPneg,ty,f.c2); 2605 f.FLty = ty; 2606 f.FLpelem = pn; /* replace with new IV */ 2607 } 2608 else if (n2.Eoper == OPconst || 2609 isLI(n2) && (op == OPadd || op == OPmin)) 2610 { 2611 debug 2612 if (debugc) 2613 { 2614 printf("found (f op const), elem ("); 2615 WReqn(n); 2616 assert(*pn == n); 2617 printf(");\n"); 2618 elem_print(n); 2619 } 2620 2621 switch (op) 2622 { 2623 case OPmul: 2624 case OPshl: 2625 f.c1 = el_bin(op,ty,f.c1,el_copytree(n2)); 2626 break; 2627 2628 case OPadd: 2629 case OPmin: 2630 break; 2631 2632 default: 2633 assert(0); 2634 } 2635 f.c2 = el_bin(op,ty,f.c2,el_copytree(n2)); 2636 f.FLty = ty; 2637 f.FLpelem = pn; /* replace with new IV */ 2638 } /* else if */ 2639 } /* for */ 2640 } /* if */ 2641 } 2642 2643 /********************************* 2644 * Eliminate frivolous family ivs, that is, 2645 * if we can't eliminate the BIV, then eliminate family ivs that 2646 * differ from it only by a constant. 2647 */ 2648 2649 private void elimfrivivs(loop *l) 2650 { 2651 for (Iv *biv = l.Livlist; biv; biv = biv.IVnext) 2652 { 2653 int nfams; 2654 int nrefs; 2655 2656 if (debugc) printf("elimfrivivs()\n"); 2657 /* Compute number of family ivs for biv */ 2658 nfams = 0; 2659 for (famlist *fl = biv.IVfamily; fl; fl = fl.FLnext) 2660 nfams++; 2661 if (debugc) printf("nfams = %d\n",nfams); 2662 2663 /* Compute number of references to biv */ 2664 if (onlyref(biv.IVbasic,l,*biv.IVincr,&nrefs)) 2665 nrefs--; 2666 if (debugc) printf("nrefs = %d\n",nrefs); 2667 assert(nrefs + 1 >= nfams); 2668 if (nrefs > nfams || // if we won't eliminate the biv 2669 (!I16 && nrefs == nfams)) 2670 { /* Eliminate any family ivs that only differ by a constant */ 2671 /* from biv */ 2672 for (famlist *fl = biv.IVfamily; fl; fl = fl.FLnext) 2673 { 2674 elem *ec1 = fl.c1; 2675 targ_llong c; 2676 2677 if (elemisone(ec1) || 2678 // Eliminate fl's that can be represented by 2679 // an addressing mode 2680 (!I16 && ec1.Eoper == OPconst && tyintegral(ec1.Ety) && 2681 ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8) 2682 ) 2683 ) 2684 { 2685 fl.FLtemp = FLELIM; 2686 2687 debug 2688 if (debugc) 2689 { 2690 printf("Eliminating frivolous IV "); 2691 WReqn(*fl.FLpelem); 2692 printf("\n"); 2693 } 2694 } 2695 } 2696 } 2697 } 2698 } 2699 2700 2701 /****************************** 2702 * Introduce new variables. 2703 */ 2704 2705 private void intronvars(loop *l) 2706 { 2707 elem *T; 2708 elem *ne; 2709 elem *t2; 2710 elem *C2; 2711 elem *cmul; 2712 tym_t ty,tyr; 2713 2714 if (debugc) printf("intronvars(%p)\n",l); 2715 for (Iv *biv = l.Livlist; biv; biv = biv.IVnext) // for each basic IV 2716 { 2717 elem *bivinc = *biv.IVincr; /* ptr to increment elem */ 2718 2719 for (famlist *fl = biv.IVfamily; fl; fl = fl.FLnext) 2720 { /* for each IV in family of biv */ 2721 if (fl.FLtemp == FLELIM) /* if already eliminated */ 2722 continue; 2723 2724 /* If induction variable can be written as a simple function */ 2725 /* of a previous induction variable, skip it. */ 2726 if (funcprev(biv,fl)) 2727 continue; 2728 2729 ty = fl.FLty; 2730 T = el_alloctmp(ty); /* allocate temporary T */ 2731 fl.FLtemp = T.EV.Vsym; 2732 2733 debug 2734 { 2735 if (debugc) printf("intronvars() introduced new variable '%s' of type ",T.EV.Vsym.Sident.ptr); 2736 if (debugc) WRTYxx(ty); 2737 if (debugc) printf("\n"); 2738 } 2739 2740 /* append elem T=biv*C1+C2 to preheader */ 2741 /* ne = biv*C1 */ 2742 tyr = fl.FLivty; /* type of biv */ 2743 ne = el_var(biv.IVbasic); 2744 ne.Ety = tyr; 2745 if (!elemisone(fl.c1)) /* don't multiply ptrs by 1 */ 2746 ne = el_bin(OPmul,tyr,ne,el_copytree(fl.c1)); 2747 if (tyfv(tyr) && tysize(ty) == SHORTSIZE) 2748 ne = el_una(OP32_16,ty,ne); 2749 C2 = el_copytree(fl.c2); 2750 t2 = el_bin(OPadd,ty,ne,C2); /* t2 = ne + C2 */ 2751 ne = el_bin(OPeq,ty,el_copytree(T),t2); 2752 appendelem(ne, &(l.Lpreheader.Belem)); 2753 2754 /* prefix T+=C1*C to elem biv+=C */ 2755 /* Must prefix in case the value of the expression (biv+=C) */ 2756 /* is used by somebody up the tree. */ 2757 cmul = el_bin(OPmul,fl.c1.Ety,el_copytree(fl.c1), 2758 el_copytree(bivinc.EV.E2)); 2759 t2 = el_bin(bivinc.Eoper,ty,el_copytree(T),cmul); 2760 t2 = doptelem(t2,GOALvalue | GOALagain); 2761 *biv.IVincr = el_bin(OPcomma,bivinc.Ety,t2,bivinc); 2762 biv.IVincr = &((*biv.IVincr).EV.E2); 2763 2764 debug 2765 if (debugc) 2766 { 2767 printf("Replacing elem ("); 2768 WReqn(*fl.FLpelem); 2769 printf(") with '%s'\n",T.EV.Vsym.Sident.ptr); 2770 printf("The init elem is ("); 2771 WReqn(ne); 2772 printf(");\nThe increment elem is ("); 2773 WReqn(t2); 2774 printf(")\n"); 2775 } 2776 2777 el_free(*fl.FLpelem); 2778 *fl.FLpelem = T; /* replace elem n with ref to T */ 2779 doflow = true; /* redo flow analysis */ 2780 go.changes++; 2781 } /* for */ 2782 } /* for */ 2783 } 2784 2785 /******************************* 2786 * Determine if induction variable can be rewritten as a simple 2787 * function of a previously generated temporary. 2788 * This can frequently 2789 * generate less code than that of an all-new temporary (especially 2790 * if it is the same as a previous temporary!). 2791 * Input: 2792 * biv Basic induction variable 2793 * fl Item in biv's family list we are looking at 2794 * Returns: 2795 * false Caller should create a new induction variable. 2796 * true *FLpelem is replaced with function of a previous 2797 * induction variable. FLtemp is set to FLELIM to 2798 * indicate this. 2799 */ 2800 2801 private bool funcprev(Iv *biv,famlist *fl) 2802 { 2803 tym_t tymin; 2804 int sz; 2805 elem *e1; 2806 elem *e2; 2807 elem *flse1; 2808 2809 debug if (debugc) 2810 printf("funcprev\n"); 2811 2812 for (famlist *fls = biv.IVfamily; fls != fl; fls = fls.FLnext) 2813 { 2814 assert(fls); /* fl must be in list */ 2815 if (fls.FLtemp == FLELIM) /* no iv we can use here */ 2816 continue; 2817 2818 /* The multipliers must match */ 2819 if (!el_match(fls.c1,fl.c1)) 2820 continue; 2821 2822 /* If the c2's match also, we got it easy */ 2823 if (el_match(fls.c2,fl.c2)) 2824 { 2825 if (tysize(fl.FLty) > tysize(fls.FLtemp.ty())) 2826 continue; /* can't increase size of var */ 2827 flse1 = el_var(fls.FLtemp); 2828 flse1.Ety = fl.FLty; 2829 goto L2; 2830 } 2831 2832 /* The difference is only in the addition. Therefore, replace 2833 *fl.FLpelem with: 2834 case 1: (fl.c2 + (fls.FLtemp - fls.c2)) 2835 case 2: (fls.FLtemp + (fl.c2 - fls.c2)) 2836 */ 2837 e1 = fl.c2; 2838 /* Subtracting relocatables usually generates slow code for */ 2839 /* linkers that can't handle arithmetic on relocatables. */ 2840 if (typtr(fls.c2.Ety)) 2841 { 2842 if (fls.c2.Eoper == OPrelconst && 2843 !(fl.c2.Eoper == OPrelconst && 2844 fl.c2.EV.Vsym == fls.c2.EV.Vsym) 2845 ) 2846 continue; 2847 } 2848 flse1 = el_var(fls.FLtemp); 2849 e2 = flse1; /* assume case 1 */ 2850 tymin = e2.Ety; 2851 if (typtr(fls.c2.Ety)) 2852 { 2853 if (!typtr(tymin)) 2854 { 2855 if (typtr(e1.Ety)) 2856 { 2857 e1 = e2; 2858 e2 = fl.c2; /* case 2 */ 2859 } 2860 else /* can't subtract fptr */ 2861 goto L1; 2862 } 2863 if (tybasic(fls.c2.Ety) == TYhptr) 2864 tymin = TYlong; 2865 else 2866 tymin = I64 ? TYllong : TYint; /* type of (ptr - ptr) */ 2867 } 2868 2869 /* If e1 and fls.c2 are fptrs, and are not from the same */ 2870 /* segment, we cannot subtract them. */ 2871 if (tyfv(e1.Ety) && tyfv(fls.c2.Ety)) 2872 { 2873 if (e1.Eoper != OPrelconst || fls.c2.Eoper != OPrelconst) 2874 goto L1; /* assume expressions have diff segs */ 2875 if (e1.EV.Vsym.Sclass != fls.c2.EV.Vsym.Sclass) 2876 { 2877 L1: 2878 el_free(flse1); 2879 continue; 2880 } 2881 } 2882 2883 /* Some more type checking... */ 2884 sz = tysize(fl.FLty); 2885 if (sz != tysize(e1.Ety) && 2886 sz != tysize(tymin)) 2887 goto L1; 2888 2889 /* Do some type checking (can't add pointers and get a pointer!) */ 2890 //if (typtr(fl.FLty) && typtr(e1.Ety) && typtr(tymin)) 2891 //goto L1; 2892 /* Construct (e1 + (e2 - fls.c2)) */ 2893 flse1 = el_bin(OPadd,fl.FLty, 2894 e1, 2895 el_bin(OPmin,tymin, 2896 e2, 2897 el_copytree(fls.c2))); 2898 if (sz < tysize(tymin) && sz == tysize(e1.Ety)) 2899 { 2900 assert(I16); 2901 flse1.EV.E2 = el_una(OPoffset,fl.FLty,flse1.EV.E2); 2902 } 2903 2904 flse1 = doptelem(flse1,GOALvalue | GOALagain); 2905 fl.c2 = null; 2906 L2: 2907 debug if (debugc) 2908 { 2909 printf("Replacing "); 2910 WReqn(*fl.FLpelem); 2911 printf(" with "); 2912 WReqn(flse1); 2913 printf("\n"); 2914 } 2915 2916 el_free(*fl.FLpelem); 2917 *fl.FLpelem = flse1; 2918 2919 /* Fix the iv so when we do loops again, we won't create */ 2920 /* yet another iv, which is just what funcprev() is supposed */ 2921 /* to prevent. */ 2922 fls.FLtemp.Sflags |= SFLnotbasiciv; 2923 2924 fl.FLtemp = FLELIM; /* mark iv as being gone */ 2925 go.changes++; 2926 doflow = true; 2927 return true; /* it was replaced */ 2928 } 2929 return false; /* need to create a new variable */ 2930 } 2931 2932 /*********************** 2933 * Eliminate basic IVs. 2934 */ 2935 2936 private void elimbasivs(loop *l) 2937 { 2938 if (debugc) printf("elimbasivs(%p)\n",l); 2939 for (Iv *biv = l.Livlist; biv; biv = biv.IVnext) // for each basic IV 2940 { 2941 /* Can't eliminate this basic IV if we have a goal for the */ 2942 /* increment elem. */ 2943 // Be careful about Nflags being in a union... 2944 elem* einc = *biv.IVincr; 2945 if (!(einc.Nflags & NFLnogoal)) 2946 continue; 2947 2948 Symbol* X = biv.IVbasic; 2949 assert(symbol_isintab(X)); 2950 tym_t ty = X.ty(); 2951 int refcount; 2952 elem** pref = onlyref(X,l,einc,&refcount); 2953 2954 /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */ 2955 if (pref != null && refcount <= 1) 2956 { 2957 famlist* fl = biv.IVfamily; 2958 if (!fl) // if no elems in family of biv 2959 continue; 2960 2961 elem* ref_ = *pref; 2962 2963 /* Replace (X) with (X != 0) */ 2964 if (ref_.Eoper == OPvar) 2965 ref_ = *pref = el_bin(OPne,TYint,ref_,el_long(ref_.Ety,0L)); 2966 2967 fl = simfl(fl,ty); /* find simplest elem in family */ 2968 if (!fl) 2969 continue; 2970 2971 // Don't do the replacement if we would replace a 2972 // signed comparison with an unsigned one 2973 tym_t flty = fl.FLty; 2974 if (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) 2975 flty = touns(flty); 2976 2977 if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && 2978 !(tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) && 2979 tyuns(flty)) 2980 continue; 2981 2982 /* if we have (e relop X), replace it with (X relop e) */ 2983 if (ref_.EV.E2.Eoper == OPvar && ref_.EV.E2.EV.Vsym == X) 2984 { 2985 elem* tmp = ref_.EV.E2; 2986 ref_.EV.E2 = ref_.EV.E1; 2987 ref_.EV.E1 = tmp; 2988 ref_.Eoper = cast(ubyte)swaprel(ref_.Eoper); 2989 } 2990 2991 // If e*c1+c2 would result in a sign change or an overflow 2992 // then we can't do it 2993 if (fl.c1.Eoper == OPconst) 2994 { 2995 targ_llong c1 = el_tolong(fl.c1); 2996 const int sz = tysize(ty); 2997 if (sz == SHORTSIZE && 2998 ((ref_.EV.E2.Eoper == OPconst && 2999 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFL) || 3000 c1 & ~0x7FFFL) 3001 ) 3002 continue; 3003 3004 if (sz == LONGSIZE && 3005 ((ref_.EV.E2.Eoper == OPconst && 3006 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFL) || 3007 c1 & ~0x7FFFFFFFL) 3008 ) 3009 continue; 3010 if (sz == LLONGSIZE && 3011 ((ref_.EV.E2.Eoper == OPconst && 3012 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFFFFFFFFFL) || 3013 c1 & ~0x7FFFFFFFFFFFFFFFL) 3014 ) 3015 continue; 3016 } 3017 3018 /* If the incr is a decrement, and the relational is < or <=, 3019 * and its unsigned, then don't do it because it could drop below 0. 3020 * https://issues.dlang.org/show_bug.cgi?id=16189 3021 */ 3022 if ((einc.Eoper == OPminass || einc.EV.E2.Eoper == OPconst && el_tolong(einc.EV.E2) < 0) && 3023 (ref_.Eoper == OPlt || ref_.Eoper == OPle) && 3024 (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety))) 3025 continue; 3026 3027 /* If loop started out with a signed conditional that was 3028 * replaced with an unsigned one, don't do it if c2 3029 * is less than 0. 3030 */ 3031 if (ref_.Nflags & NFLtouns && fl.c2.Eoper == OPconst) 3032 { 3033 targ_llong c2 = el_tolong(fl.c2); 3034 if (c2 < 0) 3035 continue; 3036 } 3037 3038 elem *refE2 = el_copytree(ref_.EV.E2); 3039 int refEoper = ref_.Eoper; 3040 3041 /* if c1 < 0 and relop is < <= > >= 3042 then adjust relop as if both sides were multiplied 3043 by -1 3044 */ 3045 if (!tyuns(ty) && 3046 (tyintegral(ty) && el_tolong(fl.c1) < 0 || 3047 tyfloating(ty) && el_toldoubled(fl.c1) < 0.0)) 3048 refEoper = swaprel(refEoper); 3049 3050 /* Replace (X relop e) with (X relop (short)e) 3051 if T is 1 word but e is 2 3052 */ 3053 if (tysize(flty) == SHORTSIZE && 3054 tysize(refE2.Ety) == LONGSIZE) 3055 refE2 = el_una(OP32_16,flty,refE2); 3056 3057 /* replace e with e*c1 + c2 */ 3058 elem* C2 = el_copytree(fl.c2); 3059 elem* fofe = el_bin(OPadd,flty, 3060 el_bin(OPmul,refE2.Ety, 3061 refE2, 3062 el_copytree(fl.c1)), 3063 C2); 3064 fofe = doptelem(fofe,GOALvalue | GOALagain); // fold any constants 3065 3066 if (tyuns(flty) && refEoper == OPge && 3067 fofe.Eoper == OPconst && el_allbits(fofe, 0) && 3068 fl.c2.Eoper == OPconst && !el_allbits(fl.c2, 0)) 3069 { 3070 /* Don't do it if replacement will result in 3071 * an unsigned T>=0 which will be an infinite loop. 3072 */ 3073 el_free(fofe); 3074 continue; 3075 } 3076 3077 if (debugc) printf("Eliminating basic IV '%s'\n",X.Sident.ptr); 3078 3079 debug if (debugc) 3080 { 3081 printf("Comparison replaced: "); 3082 WReqn(ref_); 3083 printf(" with "); 3084 } 3085 3086 el_free(ref_.EV.E2); 3087 ref_.EV.E2 = refE2; 3088 ref_.Eoper = cast(ubyte)refEoper; 3089 3090 elimass(einc); // dump the increment elem 3091 3092 // replace X with T 3093 assert(ref_.EV.E1.EV.Voffset == 0); 3094 ref_.EV.E1.EV.Vsym = fl.FLtemp; 3095 ref_.EV.E1.Ety = flty; 3096 ref_.EV.E2 = fofe; 3097 3098 /* If sizes of expression worked out wrong... 3099 Which can happen if we have (int)ptr==e 3100 */ 3101 if (OTbinary(fofe.Eoper)) // if didn't optimize it away 3102 { 3103 const tym_t fofety = fofe.Ety; 3104 const int sz = tysize(fofety); 3105 tym_t ty1 = fofe.EV.E1.Ety; 3106 const tym_t ty2 = fofe.EV.E2.Ety; 3107 /* Sizes of + expression must all be the same */ 3108 if (sz != tysize(ty1) && 3109 sz != tysize(ty2) 3110 ) 3111 { 3112 if (tyuns(fofety)) // if unsigned comparison 3113 ty1 = touns(ty1); /* to unsigned type */ 3114 fofe.Ety = ty1; 3115 ref_.EV.E1.Ety = ty1; 3116 } 3117 } 3118 3119 /* Fix if leaves of compare are TYfptrs and the compare */ 3120 /* operator is < <= > >=. */ 3121 if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && tyfv(ref_.EV.E1.Ety)) 3122 { 3123 assert(tyfv(ref_.EV.E2.Ety)); 3124 ref_.EV.E1 = el_una(OPoffset,TYuint,ref_.EV.E1); 3125 ref_.EV.E2 = el_una(OPoffset,TYuint,fofe); 3126 } 3127 3128 debug if (debugc) 3129 { 3130 WReqn(ref_); 3131 printf("\n"); 3132 } 3133 3134 go.changes++; 3135 doflow = true; /* redo flow analysis */ 3136 3137 /* if X is live on entry to any successor S outside loop */ 3138 /* prepend elem X=(T-c2)/c1 to S.Belem */ 3139 3140 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3141 { 3142 elem *ne; 3143 block *b; 3144 3145 foreach (bl; ListRange(dfo[i].Bsucc)) 3146 { /* for each successor */ 3147 b = list_block(bl); 3148 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3149 continue; /* inside loop */ 3150 if (!vec_testbit(X.Ssymnum,b.Binlv)) 3151 continue; /* not live */ 3152 3153 C2 = el_copytree(fl.c2); 3154 ne = el_bin(OPmin,ty, 3155 el_var(fl.FLtemp), 3156 C2); 3157 if (tybasic(ne.EV.E1.Ety) == TYfptr && 3158 tybasic(ne.EV.E2.Ety) == TYfptr) 3159 { 3160 ne.Ety = I64 ? TYllong : TYint; 3161 if (tylong(ty) && _tysize[TYint] == 2) 3162 ne = el_una(OPs16_32,ty,ne); 3163 } 3164 3165 ne = el_bin(OPeq,X.ty(), 3166 el_var(X), 3167 el_bin(OPdiv,ne.Ety, 3168 ne, 3169 el_copytree(fl.c1))); 3170 3171 debug if (debugc) 3172 { 3173 printf("Adding ("); 3174 WReqn(ne); 3175 printf(") to exit block B%d\n",b.Bdfoidx); 3176 //elem_print(ne); 3177 } 3178 3179 /* We have to add a new block if there is */ 3180 /* more than one predecessor to b. */ 3181 if (list_next(b.Bpred)) 3182 { 3183 block *bn = block_calloc(); 3184 bn.Btry = b.Btry; 3185 numblks++; 3186 assert(numblks <= maxblks); 3187 bn.BC = BCgoto; 3188 bn.Bnext = dfo[i].Bnext; 3189 dfo[i].Bnext = bn; 3190 list_append(&(bn.Bsucc),b); 3191 list_append(&(bn.Bpred),dfo[i]); 3192 bl.ptr = cast(void *)bn; 3193 foreach (bl2; ListRange(b.Bpred)) 3194 if (list_block(bl2) == dfo[i]) 3195 { 3196 bl2.ptr = cast(void *)bn; 3197 goto L2; 3198 } 3199 assert(0); 3200 L2: 3201 b = bn; 3202 addblk = true; 3203 } 3204 3205 if (b.Belem) 3206 b.Belem = 3207 el_bin(OPcomma,b.Belem.Ety, 3208 ne,b.Belem); 3209 else 3210 b.Belem = ne; 3211 go.changes++; 3212 doflow = true; /* redo flow analysis */ 3213 } /* for each successor */ 3214 } /* foreach exit block */ 3215 if (addblk) 3216 return; 3217 } 3218 else if (refcount == 0) /* if no uses of IV in loop */ 3219 { 3220 /* Eliminate the basic IV if it is not live on any successor */ 3221 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3222 { 3223 foreach (bl; ListRange(dfo[i].Bsucc)) 3224 { /* for each successor */ 3225 block *b = list_block(bl); 3226 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3227 continue; /* inside loop */ 3228 if (vec_testbit(X.Ssymnum,b.Binlv)) 3229 goto L1; /* live */ 3230 } 3231 } 3232 3233 if (debugc) printf("No uses, eliminating basic IV '%s' (%p)\n",X.Sident.ptr,X); 3234 3235 /* Dump the increment elem */ 3236 /* (Replace it with an OPconst that only serves as a */ 3237 /* placeholder in the tree) */ 3238 *(biv.IVincr) = el_selecte2(*(biv.IVincr)); 3239 3240 go.changes++; 3241 doflow = true; /* redo flow analysis */ 3242 L1: 3243 } 3244 } /* for */ 3245 } 3246 3247 3248 /*********************** 3249 * Eliminate opeq IVs that are not used outside the loop. 3250 */ 3251 3252 private void elimopeqs(loop *l) 3253 { 3254 elem **pref; 3255 Symbol *X; 3256 int refcount; 3257 3258 if (debugc) printf("elimopeqs(%p)\n",l); 3259 for (Iv *biv = l.Lopeqlist; biv; biv = biv.IVnext) // for each opeq IV 3260 { 3261 // Can't eliminate this basic IV if we have a goal for the 3262 // increment elem. 3263 // Be careful about Nflags being in a union... 3264 if (!((*biv.IVincr).Nflags & NFLnogoal)) 3265 continue; 3266 3267 X = biv.IVbasic; 3268 assert(symbol_isintab(X)); 3269 pref = onlyref(X,l,*biv.IVincr,&refcount); 3270 3271 // if only ref of X is of the form (X) or (X relop e) or (e relop X) 3272 if (pref != null && refcount <= 1) 3273 { } 3274 else if (refcount == 0) // if no uses of IV in loop 3275 { // Eliminate the basic IV if it is not live on any successor 3276 uint i; 3277 for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3278 { 3279 foreach (bl; ListRange(dfo[i].Bsucc)) 3280 { // for each successor 3281 block *b = list_block(bl); 3282 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3283 continue; // inside loop 3284 if (vec_testbit(X.Ssymnum,b.Binlv)) 3285 goto L1; // live 3286 } 3287 } 3288 3289 if (debugc) printf("No uses, eliminating opeq IV '%s' (%p)\n",X.Sident.ptr,X); 3290 3291 // Dump the increment elem 3292 // (Replace it with an OPconst that only serves as a 3293 // placeholder in the tree) 3294 *(biv.IVincr) = el_selecte2(*(biv.IVincr)); 3295 3296 go.changes++; 3297 doflow = true; // redo flow analysis 3298 L1: 3299 } 3300 } 3301 } 3302 3303 /************************** 3304 * Find simplest elem in family. 3305 * Input: 3306 * tym type of basic IV 3307 * Return null if none found. 3308 */ 3309 3310 private famlist * simfl(famlist *fl,tym_t tym) 3311 { 3312 famlist *sofar; 3313 3314 assert(fl); 3315 sofar = null; 3316 for (; fl; fl = fl.FLnext) 3317 { 3318 if (fl.FLtemp == FLELIM) /* no variable, so skip it */ 3319 continue; 3320 /* If size of replacement is less than size of biv, we could */ 3321 /* be in trouble due to loss of precision. */ 3322 if (size(fl.FLtemp.ty()) < size(tym)) 3323 continue; 3324 sofar = flcmp(sofar,fl); /* pick simplest */ 3325 } 3326 return sofar; 3327 } 3328 3329 /************************** 3330 * Return simpler of two family elems. 3331 * There is room for improvement, namely if 3332 * f1.c1 = 2, f2.c1 = 27 3333 * then pick f1 because it is a shift. 3334 */ 3335 3336 private famlist * flcmp(famlist *f1,famlist *f2) 3337 { 3338 tym_t ty; 3339 eve *t1; 3340 eve *t2; 3341 3342 assert(f2); 3343 if (!f1) 3344 goto Lf2; 3345 t1 = &(f1.c1.EV); 3346 t2 = &(f2.c1.EV); 3347 ty = (*f1.FLpelem).Ety; /* type of elem */ 3348 3349 static if (0) 3350 { 3351 printf("f1: c1 = %d, c2 = %d\n",t1.Vshort,f1.c2.EV.Vshort); 3352 printf("f2: c1 = %d, c2 = %d\n",t2.Vshort,f2.c2.EV.Vshort); 3353 WRTYxx((*f1.FLpelem).Ety); 3354 WRTYxx((*f2.FLpelem).Ety); 3355 } 3356 3357 /* Wimp out and just pick f1 if the types don't match */ 3358 if (tysize(ty) == tysize((*f2.FLpelem).Ety)) 3359 { 3360 switch (tybasic(ty)) 3361 { case TYbool: 3362 case TYchar: 3363 case TYschar: 3364 case TYuchar: 3365 if (t2.Vuchar == 1 || 3366 t1.Vuchar != 1 && f2.c2.EV.Vuchar == 0) 3367 goto Lf2; 3368 break; 3369 3370 case TYshort: 3371 case TYushort: 3372 case TYchar16: 3373 case TYwchar_t: // BUG: what about 4 byte wchar_t's? 3374 case_short: 3375 if (t2.Vshort == 1 || 3376 t1.Vshort != 1 && f2.c2.EV.Vshort == 0) 3377 goto Lf2; 3378 break; 3379 3380 case TYsptr: 3381 case TYcptr: 3382 case TYnptr: // BUG: 64 bit pointers? 3383 case TYimmutPtr: 3384 case TYsharePtr: 3385 case TYrestrictPtr: 3386 case TYfgPtr: 3387 case TYnullptr: 3388 case TYint: 3389 case TYuint: 3390 if (_tysize[TYint] == SHORTSIZE) 3391 goto case_short; 3392 else 3393 goto case_long; 3394 3395 case TYlong: 3396 case TYulong: 3397 case TYdchar: 3398 case TYfptr: 3399 case TYvptr: 3400 case TYhptr: 3401 case_long: 3402 if (t2.Vlong == 1 || 3403 t1.Vlong != 1 && f2.c2.EV.Vlong == 0) 3404 goto Lf2; 3405 break; 3406 3407 case TYfloat: 3408 if (t2.Vfloat == 1 || 3409 t1.Vfloat != 1 && f2.c2.EV.Vfloat == 0) 3410 goto Lf2; 3411 break; 3412 3413 case TYdouble: 3414 case TYdouble_alias: 3415 if (t2.Vdouble == 1.0 || 3416 t1.Vdouble != 1.0 && f2.c2.EV.Vdouble == 0) 3417 goto Lf2; 3418 break; 3419 3420 case TYldouble: 3421 if (t2.Vldouble == 1.0 || 3422 t1.Vldouble != 1.0 && f2.c2.EV.Vldouble == 0) 3423 goto Lf2; 3424 break; 3425 3426 case TYllong: 3427 case TYullong: 3428 if (t2.Vllong == 1 || 3429 t1.Vllong != 1 && f2.c2.EV.Vllong == 0) 3430 goto Lf2; 3431 break; 3432 3433 default: 3434 assert(0); 3435 } 3436 } 3437 //printf("picking f1\n"); 3438 return f1; 3439 3440 Lf2: 3441 //printf("picking f2\n"); 3442 return f2; 3443 } 3444 3445 /************************************ 3446 * Input: 3447 * x basic IV symbol 3448 * incn increment elem for basic IV X. 3449 * Output: 3450 * *prefcount # of references to X other than the increment elem 3451 * Returns: 3452 * If ref of X in loop l is of the form (X relop e) or (e relop X) 3453 * Return the relop elem 3454 * Else 3455 * Return null 3456 */ 3457 3458 private __gshared 3459 { 3460 int count; 3461 elem **nd; 3462 elem *sincn; 3463 Symbol *X; 3464 } 3465 3466 private elem ** onlyref(Symbol *x,loop *l,elem *incn,int *prefcount) 3467 { 3468 uint i; 3469 3470 //printf("onlyref('%s')\n", x.Sident.ptr); 3471 X = x; /* save some parameter passing */ 3472 assert(symbol_isintab(x)); 3473 sincn = incn; 3474 3475 debug 3476 if (!(X.Ssymnum < globsym.top && l && incn)) 3477 printf("X = %d, globsym.top = %d, l = %p, incn = %p\n",X.Ssymnum,globsym.top,l,incn); 3478 3479 assert(X.Ssymnum < globsym.top && l && incn); 3480 count = 0; 3481 nd = null; 3482 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3483 { 3484 block *b; 3485 3486 b = dfo[i]; 3487 if (b.Belem) 3488 { 3489 countrefs(&b.Belem,b.BC == BCiftrue); 3490 } 3491 } 3492 3493 static if (0) 3494 { 3495 printf("count = %d, nd = ("); 3496 if (nd) WReqn(*nd); 3497 printf(")\n"); 3498 } 3499 3500 *prefcount = count; 3501 return nd; 3502 } 3503 3504 3505 /****************************** 3506 * Count elems of the form (X relop e) or (e relop X). 3507 * Do not count the node if it is the increment node (sincn). 3508 * Input: 3509 * flag: true if block wants to test the elem 3510 */ 3511 3512 private void countrefs(elem **pn,bool flag) 3513 { 3514 elem *n = *pn; 3515 3516 assert(n); 3517 if (n == sincn) /* if it is the increment elem */ 3518 { 3519 if (OTbinary(n.Eoper)) 3520 countrefs(&n.EV.E2, false); 3521 return; // don't count lvalue 3522 } 3523 if (OTunary(n.Eoper)) 3524 countrefs(&n.EV.E1,false); 3525 else if (OTbinary(n.Eoper)) 3526 { 3527 if (OTrel(n.Eoper)) 3528 { 3529 elem *e1 = n.EV.E1; 3530 3531 assert(e1.Eoper != OPcomma); 3532 if (e1 == sincn && 3533 (e1.Eoper == OPeq || OTopeq(e1.Eoper))) 3534 goto L1; 3535 3536 /* Check both subtrees to see if n is the comparison node, 3537 * that is, if X is a leaf of the comparison. 3538 */ 3539 if (e1.Eoper == OPvar && e1.EV.Vsym == X && !countrefs2(n.EV.E2) || 3540 n.EV.E2.Eoper == OPvar && n.EV.E2.EV.Vsym == X && !countrefs2(e1)) 3541 nd = pn; /* found the relop node */ 3542 } 3543 L1: 3544 countrefs(&n.EV.E1,false); 3545 countrefs(&n.EV.E2,(flag && n.Eoper == OPcomma)); 3546 } 3547 else if ((n.Eoper == OPvar || n.Eoper == OPrelconst) && n.EV.Vsym == X) 3548 { 3549 if (flag) 3550 nd = pn; /* comparing it with 0 */ 3551 count++; /* found another reference */ 3552 } 3553 } 3554 3555 /******************************* 3556 * Count number of times symbol X appears in elem tree e. 3557 */ 3558 3559 private int countrefs2(elem *e) 3560 { 3561 elem_debug(e); 3562 while (OTunary(e.Eoper)) 3563 e = e.EV.E1; 3564 if (OTbinary(e.Eoper)) 3565 return countrefs2(e.EV.E1) + countrefs2(e.EV.E2); 3566 return ((e.Eoper == OPvar || e.Eoper == OPrelconst) && 3567 e.EV.Vsym == X); 3568 } 3569 3570 /**************************** 3571 * Eliminate some special cases. 3572 */ 3573 3574 private void elimspec(loop *l) 3575 { 3576 uint i; 3577 3578 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3579 { 3580 block *b; 3581 3582 b = dfo[i]; 3583 if (b.Belem) 3584 elimspecwalk(&b.Belem); 3585 } 3586 } 3587 3588 3589 /****************************** 3590 */ 3591 3592 private void elimspecwalk(elem **pn) 3593 { 3594 elem *n; 3595 3596 n = *pn; 3597 assert(n); 3598 if (OTunary(n.Eoper)) 3599 elimspecwalk(&n.EV.E1); 3600 else if (OTbinary(n.Eoper)) 3601 { 3602 elimspecwalk(&n.EV.E1); 3603 elimspecwalk(&n.EV.E2); 3604 if (OTrel(n.Eoper)) 3605 { 3606 elem *e1 = n.EV.E1; 3607 3608 /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3). 3609 * This will reduce the number of cases for elimbasivs(). 3610 * Don't do equivalent with (e1 rel (e2,e3)) because 3611 * of potential side effects in e1. 3612 */ 3613 if (e1.Eoper == OPcomma) 3614 { 3615 elem *e; 3616 3617 debug if (debugc) 3618 { printf("3rewriting ("); WReqn(n); printf(")\n"); } 3619 3620 e = n.EV.E2; 3621 n.EV.E2 = e1; 3622 n.EV.E1 = n.EV.E2.EV.E1; 3623 n.EV.E2.EV.E1 = n.EV.E2.EV.E2; 3624 n.EV.E2.EV.E2 = e; 3625 n.EV.E2.Eoper = n.Eoper; 3626 n.EV.E2.Ety = n.Ety; 3627 n.Eoper = OPcomma; 3628 3629 go.changes++; 3630 doflow = true; 3631 3632 elimspecwalk(&n.EV.E1); 3633 elimspecwalk(&n.EV.E2); 3634 } 3635 3636 /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3)) 3637 * Rewrite ((X ++ e2) rel e3) into ((X += e2),(X-e2 rel e3)) 3638 * so that the op= will not have a goal, so elimbasivs() 3639 * will work on it. 3640 */ 3641 if ((OTopeq(e1.Eoper) 3642 || OTpost(e1.Eoper) 3643 ) && 3644 !el_sideeffect(e1.EV.E1)) 3645 { 3646 elem *e; 3647 OPER op; 3648 3649 debug if (debugc) 3650 { printf("4rewriting ("); WReqn(n); printf(")\n"); } 3651 3652 e = el_calloc(); 3653 el_copy(e,n); 3654 e.EV.E1 = el_copytree(e1.EV.E1); 3655 e.EV.E1.Ety = n.EV.E1.Ety; 3656 n.EV.E2 = e; 3657 switch (e1.Eoper) 3658 { 3659 case OPpostinc: 3660 e1.Eoper = OPaddass; 3661 op = OPmin; 3662 goto L3; 3663 3664 case OPpostdec: 3665 e1.Eoper = OPminass; 3666 op = OPadd; 3667 L3: e.EV.E1 = el_bin(op,e.EV.E1.Ety,e.EV.E1,el_copytree(e1.EV.E2)); 3668 break; 3669 3670 default: 3671 break; 3672 } 3673 /* increment node is now guaranteed to have no goal */ 3674 e1.Nflags |= NFLnogoal; 3675 n.Eoper = OPcomma; 3676 //go.changes++; 3677 doflow = true; 3678 3679 elimspecwalk(&n.EV.E1); 3680 elimspecwalk(&n.EV.E2); 3681 } 3682 } 3683 } 3684 } 3685 3686 /********************************* 3687 * Unroll loop if possible. 3688 * Params: 3689 * l = loop to unroll 3690 * Returns: 3691 * true if loop was unrolled 3692 */ 3693 3694 struct UnrollWalker 3695 { 3696 nothrow: 3697 uint defnum; 3698 int state; 3699 Symbol *v; 3700 targ_llong increment; 3701 3702 /*********************************** 3703 * Walk e in execution order, fixing it according to state. 3704 * state == 0: when rdinc is found, remove it, advance to state 1 3705 * state == 1: continue, replacing instances of v with v+increment, 3706 * when second rdinc is found, advance to state 2 3707 * state == 2: continue 3708 */ 3709 3710 void walker(elem *e) 3711 { 3712 assert(e); 3713 const op = e.Eoper; 3714 if (ERTOL(e)) 3715 { 3716 if (e.Edef != defnum) 3717 { 3718 walker(e.EV.E2); 3719 walker(e.EV.E1); 3720 } 3721 } 3722 else if (OTbinary(op)) 3723 { 3724 if (e.Edef != defnum) 3725 { 3726 walker(e.EV.E1); 3727 walker(e.EV.E2); 3728 } 3729 } 3730 else if (OTunary(op)) 3731 { 3732 assert(e.Edef != defnum); 3733 walker(e.EV.E1); 3734 } 3735 else if (op == OPvar && 3736 state == 1 && 3737 e.EV.Vsym == v) 3738 { 3739 // overwrite e with (v+increment) 3740 elem *e1 = el_calloc(); 3741 el_copy(e1,e); 3742 e.Eoper = OPadd; 3743 e.EV.E1 = e1; 3744 e.EV.E2 = el_long(e.Ety, increment); 3745 } 3746 if (OTdef(op) && e.Edef == defnum) 3747 { 3748 switch (state) 3749 { 3750 case 0: 3751 el_free(e.EV.E1); 3752 el_free(e.EV.E2); 3753 e.Eoper = OPconst; 3754 e.EV.Vllong = 0; 3755 break; 3756 3757 case 1: 3758 break; 3759 3760 default: 3761 assert(0); 3762 } 3763 ++state; 3764 } 3765 } 3766 } 3767 3768 3769 bool loopunroll(loop *l) 3770 { 3771 const bool log = false; 3772 if (log) printf("loopunroll(%p)\n", l); 3773 3774 /* Do not repeatedly unroll the same loop, 3775 * or waste time attempting to 3776 */ 3777 if (l.Lhead.Bflags & BFLnounroll) 3778 return false; 3779 l.Lhead.Bflags |= BFLnounroll; 3780 3781 /* For simplification, only unroll loops that consist only 3782 * of a head and tail, and the tail is the exit block. 3783 */ 3784 int numblocks = 0; 3785 int i; 3786 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3787 ++numblocks; 3788 if (numblocks != 2) 3789 { 3790 if (log) printf("\tnot 2 blocks\n"); 3791 return false; 3792 } 3793 assert(l.Lhead != l.Ltail); 3794 3795 /* tail must be the sole exit block 3796 */ 3797 if (vec_testbit(l.Lhead.Bdfoidx, l.Lexit) || 3798 !vec_testbit(l.Ltail.Bdfoidx, l.Lexit)) 3799 { 3800 if (log) printf("\ttail not sole exit block\n"); 3801 return false; 3802 } 3803 3804 elem *ehead = l.Lhead.Belem; 3805 elem *etail = l.Ltail.Belem; 3806 3807 if (log) 3808 { 3809 printf("Unroll candidate:\n"); 3810 printf(" head:\t"); WReqn(l.Lhead.Belem); printf("\n"); 3811 printf(" tail:\t"); WReqn(l.Ltail.Belem); printf("\n"); 3812 } 3813 3814 /* Tail must be of the form: (v < c) where v is an unsigned integer 3815 */ 3816 if (etail.Eoper != OPlt || 3817 etail.EV.E1.Eoper != OPvar || 3818 etail.EV.E2.Eoper != OPconst) 3819 { 3820 if (log) printf("\tnot (v < c)\n"); 3821 return false; 3822 } 3823 3824 elem *e1 = etail.EV.E1; 3825 elem *e2 = etail.EV.E2; 3826 3827 if (!tyintegral(e1.Ety) || 3828 tysize(e1.Ety) > targ_llong.sizeof || 3829 !(tyuns(e1.Ety) || tyuns(e2.Ety))) 3830 { 3831 if (log) printf("\tnot (integral unsigned)\n"); 3832 return false; 3833 } 3834 3835 // extern int el_length(elem *e); 3836 int cost = el_length(ehead); 3837 //printf("test4 cost: %d\n", cost); 3838 3839 if (cost > 100) 3840 { 3841 if (log) printf("\tcost %d\n", cost); 3842 return false; 3843 } 3844 3845 Symbol* v = e1.EV.Vsym; 3846 3847 // RD info is only reliable for registers and autos 3848 if (!(sytab[v.Sclass] & SCRD) || !(v.Sflags & SFLunambig)) 3849 { 3850 if (log) printf("\tnot SCRD\n"); 3851 return false; 3852 } 3853 3854 /* Find the initial, increment elem, and final value of s 3855 */ 3856 elem *einitial; 3857 elem *eincrement; 3858 if (!findloopparameters(etail, einitial, eincrement)) 3859 { 3860 if (log) printf("\tnot findloopparameters()\n"); 3861 return false; 3862 } 3863 3864 targ_llong initial = el_tolong(einitial.EV.E2); 3865 targ_llong increment = el_tolong(eincrement.EV.E2); 3866 if (eincrement.Eoper == OPpostdec || eincrement.Eoper == OPminass) 3867 increment = -increment; 3868 targ_llong final_ = el_tolong(e2); 3869 3870 if (log) printf("initial = %lld, increment = %lld, final = %lld\n",cast(long)initial,cast(long)increment,cast(long)final_); 3871 3872 if (initial < 0 || 3873 final_ < initial || 3874 increment <= 0 || 3875 (final_ - initial) % increment) 3876 { 3877 if (log) printf("\tnot (evenly divisible)\n"); 3878 return false; 3879 } 3880 3881 /* If loop would only execute once anyway, just remove the test at the end 3882 */ 3883 if (initial + increment == final_) 3884 { 3885 if (log) printf("\tjust once\n"); 3886 etail.Eoper = OPcomma; 3887 e2.EV.Vllong = 0; 3888 e2.Ety = etail.Ety; 3889 return false; 3890 } 3891 3892 /* Unroll once 3893 */ 3894 if ((final_ - initial) % 2) 3895 { 3896 if (log) printf("\tnot (divisible by 2)\n"); 3897 return false; 3898 } 3899 3900 if (log) printf("Unrolling starting\n"); 3901 3902 // Double the increment 3903 eincrement.EV.E2.EV.Vllong *= 2; 3904 //printf(" 4head:\t"); WReqn(l.Lhead.Belem); printf("\n"); 3905 3906 elem *esecond = el_copytree(ehead); 3907 elem *e = el_combine(ehead, esecond); 3908 3909 /* Walk e in execution order. 3910 * When eincrement is found, remove it. 3911 * Continue, replacing instances of `v` with `v+increment` 3912 * When second eincrement is found, stop. 3913 */ 3914 UnrollWalker uw; 3915 uw.defnum = eincrement.Edef; 3916 uw.state = 0; 3917 uw.v = v; 3918 uw.increment = increment; 3919 uw.walker(e); 3920 assert(uw.state == 2); 3921 3922 l.Lhead.Belem = e; 3923 3924 /* If unrolled loop would only execute once anyway, just remove the test at the end 3925 */ 3926 if (initial + 2 * increment == final_) 3927 { 3928 if (log) printf("\tjust twice\n"); 3929 etail.Eoper = OPcomma; 3930 e2.EV.Vllong = 0; 3931 e2.Ety = etail.Ety; 3932 } 3933 3934 //WRfunc(); 3935 return true; 3936 } 3937 3938 /****************************** 3939 * Count number of elems in a tree 3940 * Params: 3941 * e = tree 3942 * Returns: 3943 * number of elems in tree 3944 */ 3945 int el_length(elem *e) 3946 { 3947 int n = 0; 3948 while (e) 3949 { 3950 n += 1; 3951 if (!OTleaf(e.Eoper)) 3952 { 3953 n += el_length(e.EV.E2); 3954 e = e.EV.E1; 3955 } 3956 else 3957 break; 3958 } 3959 return n; 3960 } 3961 3962 3963 }