1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1985-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gloop.d, backend/gloop.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/gloop.d 11 */ 12 13 14 module dmd.backend.gloop; 15 16 version (SCPP) 17 version = COMPILE; 18 version (MARS) 19 version = COMPILE; 20 21 version (COMPILE) 22 { 23 24 import core.stdc.stdio; 25 import core.stdc.stdlib; 26 import core.stdc.string; 27 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code_x86; 31 import dmd.backend.evalu8 : el_toldoubled; 32 import dmd.backend.oper; 33 import dmd.backend.global; 34 import dmd.backend.goh; 35 import dmd.backend.el; 36 import dmd.backend.outbuf; 37 import dmd.backend.symtab; 38 import dmd.backend.ty; 39 import dmd.backend.type; 40 41 import dmd.backend.barray; 42 import dmd.backend.dlist; 43 import dmd.backend.dvec; 44 import dmd.backend.mem; 45 46 nothrow: 47 48 char symbol_isintab(Symbol *s) { return sytab[s.Sclass] & SCSS; } 49 50 extern (C++): 51 52 bool findloopparameters(elem* erel, ref elem* rdeq, ref elem* rdinc); 53 54 alias Loops = Rarray!loop; 55 56 57 /********************************* 58 * Loop data structure. 59 */ 60 61 struct loop 62 { 63 nothrow: 64 vec_t Lloop; // Vector of blocks in this loop 65 vec_t Lexit; // Vector of exit blocks of loop 66 block *Lhead; // Pointer to header of loop 67 block *Ltail; // Pointer to tail 68 block *Lpreheader; // Pointer to preheader (if any) 69 Barray!(elem*) Llis; // loop invariant elems moved to Lpreheader, so 70 // redundant temporaries aren't created 71 Rarray!Iv Livlist; // basic induction variables 72 Rarray!Iv Lopeqlist; // list of other op= variables 73 74 /************************* 75 * Reset memory so this allocation can be re-used. 76 */ 77 void reset() 78 { 79 vec_free(Lloop); 80 vec_free(Lexit); 81 82 foreach (ref iv; Livlist) 83 iv.reset(); 84 foreach (ref iv; Lopeqlist) 85 iv.reset(); 86 87 Llis.reset(); 88 Livlist.reset(); 89 Lopeqlist.reset(); 90 } 91 92 /*********************** 93 * Write loop. 94 */ 95 96 void print() 97 { 98 debug 99 { 100 loop *l = &this; 101 printf("loop %p\n", l); 102 printf("\thead: B%d, tail: B%d, prehead: B%d\n",l.Lhead.Bdfoidx, 103 l.Ltail.Bdfoidx,(l.Lpreheader ) ? l.Lpreheader.Bdfoidx 104 : cast(uint)-1); 105 printf("\tLloop "); vec_println(l.Lloop); 106 printf("\tLexit "); vec_println(l.Lexit); 107 } 108 } 109 } 110 111 struct famlist 112 { 113 nothrow: 114 elem **FLpelem; /* parent of elem in the family */ 115 elem *c1; 116 elem *c2; // c1*(basic IV) + c2 117 Symbol *FLtemp; // symbol index of temporary (FLELIM if */ 118 /* this entry has no temporary) */ 119 tym_t FLty; /* type of this induction variable */ 120 tym_t FLivty; /* type of the basic IV elem (which is */ 121 /* not necessarilly the type of the IV */ 122 /* elem!) */ 123 124 void reset() 125 { 126 el_free(c1); 127 el_free(c2); 128 } 129 130 void print() const 131 { 132 debug 133 { 134 printf("famlist:\n"); 135 printf("*FLpelem:\n"); 136 elem_print(*FLpelem); 137 printf("c1:"); 138 elem_print(c1); 139 printf("c2:"); 140 elem_print(c2); 141 printf("FLty = "); WRTYxx(FLty); 142 printf("\nFLivty = "); WRTYxx(FLivty); 143 printf("\n"); 144 } 145 } 146 } 147 148 enum FLELIM = cast(Symbol *)-1; 149 150 struct Iv 151 { 152 nothrow: 153 Symbol *IVbasic; // symbol of basic IV 154 elem **IVincr; // pointer to parent of IV increment elem 155 Barray!famlist IVfamily; // variables in this family 156 157 void reset() 158 { 159 foreach (ref fl; IVfamily) 160 { 161 fl.reset(); 162 } 163 IVfamily.reset(); 164 } 165 166 void print() const 167 { 168 debug 169 { 170 printf("IV: '%s'\n",IVbasic.Sident.ptr); 171 printf("*IVincr:\n"); 172 elem_print(*IVincr); 173 } 174 } 175 } 176 177 178 private __gshared bool addblk; /* if TRUE, then we added a block */ 179 180 /* is elem loop invariant? */ 181 int isLI(const elem* n) { return n.Nflags & NFLli; } 182 183 /* make elem loop invariant */ 184 void makeLI(elem* n) { n.Nflags |= NFLli; } 185 186 /****************************** 187 * Only variables that could only be unambiguously defined 188 * are candidates for loop invariant removal and induction 189 * variables. 190 * This means only variables that have the SFLunambig flag 191 * set for them. 192 * Doing this will still cover 90% (I hope) of the cases, and 193 * is a lot faster to compute. 194 */ 195 196 /************* 197 * Free loops. 198 */ 199 200 private void freeloop(ref Loops loops) 201 { 202 foreach (ref loop; loops) 203 loop.reset(); 204 loops.reset(); 205 } 206 207 208 /********************************** 209 * Initialize block information. 210 * Returns: 211 * !=0 contains BCasm block 212 */ 213 214 int blockinit() 215 { 216 bool hasasm = false; 217 218 assert(dfo); 219 uint i = 0; 220 foreach (b; BlockRange(startblock)) 221 { 222 debug /* check integrity of Bpred and Bsucc */ 223 L1: 224 foreach (blp; ListRange(b.Bpred)) 225 { 226 foreach (bls; ListRange(list_block(blp).Bsucc)) 227 if (list_block(bls) == b) 228 continue L1; 229 assert(0); 230 } 231 232 ++i; 233 if (b.BC == BCasm) 234 hasasm = true; 235 /* compute number of blocks */ 236 } 237 foreach (j, b; dfo[]) 238 { 239 assert(b.Bdfoidx == j); 240 b.Bdom = vec_realloc(b.Bdom, dfo.length); /* alloc Bdom vectors */ 241 vec_clear(b.Bdom); 242 } 243 return hasasm; 244 } 245 246 /**************************************** 247 * Compute dominators (Bdom) for each block. 248 * See Aho & Ullman Fig. 13.5. 249 * Note that flow graph is reducible if there is only one 250 * pass through the loop. 251 * Input: 252 * dfo[] 253 * Output: 254 * fills in the Bdom vector for each block 255 */ 256 257 void compdom() 258 { 259 compdom(dfo[]); 260 } 261 262 private extern (D) void compdom(block*[] dfo) 263 { 264 assert(dfo.length); 265 block* sb = dfo[0]; // starting block 266 267 vec_clear(sb.Bdom); 268 vec_setbit(0,sb.Bdom); // starting block only doms itself 269 foreach (b; dfo) // for all except startblock 270 { 271 if (b != sb) 272 vec_set(b.Bdom); // dominate all blocks 273 } 274 275 vec_t t1 = vec_calloc(vec_numbits(sb.Bdom)); // allocate a temporary 276 uint cntr = 0; // # of times thru loop 277 bool chgs; 278 do 279 { 280 chgs = false; 281 foreach (i, b; dfo) // for each block in dfo[] 282 { 283 if (i == 0) 284 continue; // except startblock 285 if (b.Bpred) // if there are predecessors 286 { 287 vec_set(t1); 288 foreach (bl; ListRange(b.Bpred)) 289 { 290 vec_andass(t1,list_block(bl).Bdom); 291 } 292 } 293 else 294 vec_clear(t1); // no predecessors to dominate 295 vec_setbit(i,t1); // each block doms itself 296 if (chgs) 297 vec_copy(b.Bdom,t1); 298 else if (!vec_equal(b.Bdom,t1)) // if any changes 299 { 300 vec_copy(b.Bdom,t1); 301 chgs = true; 302 } 303 } 304 cntr++; 305 assert(cntr < 50); // should have converged by now 306 } while (chgs); 307 vec_free(t1); 308 309 debug if (debugc) 310 { 311 printf("Flow graph is%s reducible\n", cntr <= 2 ? "".ptr : " not".ptr); 312 } 313 } 314 315 /*************************** 316 * Return !=0 if block A dominates block B. 317 */ 318 319 bool dom(const block* A, const block* B) 320 { 321 assert(A && B && dfo && dfo[A.Bdfoidx] == A); 322 return vec_testbit(A.Bdfoidx,B.Bdom) != 0; 323 } 324 325 /********************** 326 * Find all the loops. 327 */ 328 329 private extern (D) void findloops(block*[] dfo, ref Loops loops) 330 { 331 freeloop(loops); 332 333 //printf("findloops()\n"); 334 foreach (b; dfo) 335 b.Bweight = 1; // reset Bweights 336 foreach_reverse (b; dfo) // for each block (note reverse 337 // dfo order, so most nested 338 // loops are found first) 339 { 340 assert(b); 341 foreach (bl; ListRange(b.Bsucc)) 342 { 343 block *s = list_block(bl); // each successor s to b 344 assert(s); 345 if (dom(s,b)) // if s dominates b 346 buildloop(loops, s, b); // we found a loop 347 } 348 } 349 350 debug if (debugc) 351 { 352 foreach (ref l; loops) 353 l.print(); 354 } 355 } 356 357 /******************************** 358 */ 359 360 private uint loop_weight(uint weight, int factor) pure 361 { 362 // Be careful not to overflow 363 if (weight < 0x1_0000) 364 weight *= 10 * factor; 365 else if (weight < 0x10_0000) 366 weight *= 2 * factor; 367 else 368 weight += factor; 369 assert(cast(int)weight > 0); 370 return weight; 371 } 372 373 /***************************** 374 * Construct natural loop. 375 * Algorithm 13.1 from Aho & Ullman. 376 * Note that head dom tail. 377 */ 378 379 private void buildloop(ref Loops ploops,block *head,block *tail) 380 { 381 loop *l; 382 383 //printf("buildloop()\n"); 384 /* See if this is part of an existing loop. If so, merge the two. */ 385 foreach (ref lp; ploops) 386 if (lp.Lhead == head) /* two loops with same header */ 387 { 388 vec_t v; 389 390 // Calculate loop contents separately so we get the Bweights 391 // done accurately. 392 393 v = vec_calloc(dfo.length); 394 vec_setbit(head.Bdfoidx,v); 395 head.Bweight = loop_weight(head.Bweight, 1); 396 insert(tail,v); 397 398 vec_orass(lp.Lloop,v); // merge into existing loop 399 vec_free(v); 400 401 vec_clear(lp.Lexit); // recompute exit blocks 402 l = &lp; 403 goto L1; 404 } 405 406 /* Allocate loop entry */ 407 l = ploops.push(); 408 409 l.Lloop = vec_calloc(dfo.length); // allocate loop bit vector 410 l.Lexit = vec_calloc(dfo.length); // bit vector for exit blocks 411 l.Lhead = head; 412 l.Ltail = tail; 413 l.Lpreheader = null; 414 415 vec_setbit(head.Bdfoidx,l.Lloop); /* add head to the loop */ 416 head.Bweight = loop_weight(head.Bweight, 2); // *20 usage for loop header 417 418 insert(tail,l.Lloop); /* insert tail in loop */ 419 420 L1: 421 /* Find all the exit blocks (those blocks with 422 * successors outside the loop). 423 */ 424 425 // for each block in this loop 426 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) 427 { 428 if (dfo[i].BC == BCret || dfo[i].BC == BCretexp || dfo[i].BC == BCexit) 429 vec_setbit(i,l.Lexit); /* ret blocks are exit blocks */ 430 else 431 { 432 foreach (bl; ListRange(dfo[i].Bsucc)) 433 if (!vec_testbit(list_block(bl).Bdfoidx,l.Lloop)) 434 { 435 vec_setbit(i,l.Lexit); 436 break; 437 } 438 } 439 } 440 441 /* Find preheader, if any, to the loop. 442 The preheader is a block that has only the head as a successor. 443 All other predecessors of head must be inside the loop. 444 */ 445 l.Lpreheader = null; 446 foreach (bl; ListRange(head.Bpred)) 447 { 448 block *b = list_block(bl); 449 450 if (!vec_testbit(b.Bdfoidx,l.Lloop)) /* if not in loop */ 451 { 452 if (l.Lpreheader) /* if already one */ 453 { 454 l.Lpreheader = null; /* can only be one */ 455 break; 456 } 457 else 458 { 459 if (list_next(b.Bsucc)) // if more than 1 successor 460 break; // b can't be a preheader 461 l.Lpreheader = b; 462 } 463 } 464 } 465 } 466 467 /******************************** 468 * Support routine for buildloop(). 469 * Add a block b and all its predecessors to loop lv. 470 */ 471 472 private void insert(block *b, vec_t lv) 473 { 474 assert(b && lv); 475 if (!vec_testbit(b.Bdfoidx,lv)) /* if block is not in loop */ 476 { 477 vec_setbit(b.Bdfoidx,lv); /* add block to loop */ 478 b.Bweight = loop_weight(b.Bweight,1); // *10 usage count 479 foreach (bl; ListRange(b.Bpred)) 480 insert(list_block(bl),lv); /* insert all its predecessors */ 481 } 482 } 483 484 /************************************** 485 * Perform loop rotations. 486 * Loop starts as: 487 * 488 * prehead 489 * | 490 * v 491 * +->head----> 492 * | | 493 * | v 494 * | body 495 * | | 496 * | v 497 * +--tail 498 * 499 * Two types are done: 500 * 1) Header is moved to be past the tail. 501 * 502 * prehead 503 * | 504 * +---+ 505 * | 506 * | body<-+ 507 * | | | 508 * | v | 509 * | tail | 510 * | | | 511 * | v | 512 * +->head--+ 513 * | 514 * v 515 * 516 * 2) Header is copied past the tail (done only if MFtime is set). 517 * 518 * prehead 519 * | 520 * v 521 * head1-----+ 522 * | | 523 * v | 524 * body<--+ | 525 * | | | 526 * v | | 527 * tail | | 528 * | | | 529 * v | | 530 * head2--+ | 531 * | | 532 * +--------+ 533 * v 534 * 535 * Input: 536 * Loop information (do not depend on the preheader information) 537 * Output: 538 * Revised list of blocks, a new dfo and new loop information 539 * Returns: 540 * true need to recompute loop data 541 */ 542 543 private int looprotate(ref loop l) 544 { 545 block *tail = l.Ltail; 546 block *head = l.Lhead; 547 548 //printf("looprotate(%p)\n",l); 549 550 // Do not rotate loop if: 551 if (head == tail || // loop is only one block big 552 !vec_testbit(head.Bdfoidx,l.Lexit)) // header is not an exit block 553 goto Lret; 554 555 if (//iter != 1 && 556 vec_testbit(tail.Bdfoidx,l.Lexit)) // tail is an exit block 557 goto Lret; 558 559 // Do not rotate if already rotated 560 foreach (b; BlockRange(tail.Bnext)) 561 if (b == head) // if loop already rotated 562 goto Lret; 563 564 if (head.BC == BCtry) 565 goto Lret; 566 if (head.BC == BC_try) 567 goto Lret; 568 569 //if (debugc) { printf("looprotate: "); l.print(); } 570 571 if ((go.mfoptim & MFtime) && head.BC != BCswitch && head.BC != BCasm) 572 { // Duplicate the header past the tail (but doing 573 // switches would be too expensive in terms of code 574 // generated). 575 576 auto head2 = block_calloc(); // create new head block 577 head2.Btry = head.Btry; 578 head2.Bflags = head.Bflags; 579 head.Bflags = BFLnomerg; // move flags over to head2 580 head2.Bflags |= BFLnomerg; 581 head2.BC = head.BC; 582 assert(head2.BC != BCswitch); 583 if (head.Belem) // copy expression tree 584 head2.Belem = el_copytree(head.Belem); 585 head2.Bnext = tail.Bnext; 586 tail.Bnext = head2; 587 588 // pred(head1) = pred(head) outside loop 589 // pred(head2) = pred(head) inside loop 590 list_t *pbln; 591 auto pbl2 = &(head2.Bpred); 592 for (list_t *pbl = &(head.Bpred); *pbl; pbl = pbln) 593 { 594 if (vec_testbit(list_block(*pbl).Bdfoidx, l.Lloop)) 595 { // if this predecessor is inside the loop 596 597 *pbl2 = *pbl; 598 *pbl = list_next(*pbl); 599 pbln = pbl; // don't skip this next one 600 (*pbl2).next = null; 601 auto bsucc = list_block(*pbl2).Bsucc; 602 pbl2 = &((*pbl2).next); 603 foreach (bl; ListRange(bsucc)) 604 if (list_block(bl) == head) 605 { 606 bl.ptr = cast(void *)head2; 607 goto L2; 608 } 609 assert(0); 610 L2: 611 } 612 else 613 pbln = &((*pbl).next); // next predecessor in list 614 } // for each pred(head) 615 616 // succ(head2) = succ(head) 617 foreach (bl; ListRange(head.Bsucc)) 618 { 619 list_append(&(head2.Bsucc),list_block(bl)); 620 list_append(&(list_block(bl).Bpred),head2); 621 } 622 if (debugc) printf("1Rotated loop %p\n", &l); 623 go.changes++; 624 return true; 625 } 626 else if (startblock != head 627 /* This screws up the OPctor/OPdtor sequence for: 628 * struct CString 629 * { CString(); 630 * ~CString(); 631 * int GetLength(); 632 * }; 633 * 634 * void f(void) 635 * { for(;;) 636 * { CString s ; 637 * if(s.GetLength()!=0) 638 * break ; 639 * } 640 * } 641 */ 642 && !(config.flags3 & CFG3eh) 643 ) 644 { // optimize for space 645 // Simply position the header past the tail 646 foreach (b; BlockRange(startblock)) 647 { 648 if (b.Bnext == head) 649 { // found parent b of head 650 b.Bnext = head.Bnext; 651 head.Bnext = tail.Bnext; 652 tail.Bnext = head; 653 if (debugc) printf("2Rotated loop %p\n", &l); 654 go.changes++; 655 return false; 656 } 657 } 658 assert(0); 659 } 660 Lret: 661 return false; 662 } 663 664 private __gshared 665 { 666 int gref; // parameter for markinvar() 667 block *gblock; // parameter for markinvar() 668 vec_t lv; // parameter for markinvar() 669 vec_t gin; // parameter for markinvar() 670 bool doflow; // true if flow analysis has to be redone 671 } 672 673 /********************************* 674 * Loop invariant and induction variable elimination. 675 * Input: 676 * iter which optimization iteration we are on 677 */ 678 679 void loopopt() 680 { 681 __gshared Loops startloop_cache; 682 683 Loops startloop = startloop_cache; 684 685 if (debugc) printf("loopopt()\n"); 686 restart: 687 file_progress(); 688 if (blockinit()) // init block data 689 { 690 findloops(dfo[], startloop); // Compute Bweights 691 freeloop(startloop); // free existing loops 692 startloop_cache = startloop; 693 return; // can't handle ASM blocks 694 } 695 compdom(); // compute dominators 696 findloops(dfo[], startloop); // find the loops 697 698 L3: 699 while (1) 700 { 701 foreach (ref l; startloop) 702 { 703 if (looprotate(l)) // rotate the loop 704 { 705 compdfo(); 706 blockinit(); 707 compdom(); 708 findloops(dfo[], startloop); 709 continue L3; 710 } 711 } 712 break; 713 } 714 715 // Make sure there is a preheader for each loop. 716 717 addblk = false; /* assume no blocks added */ 718 foreach (ref l; startloop) 719 { 720 //if (debugc) l.print(); 721 722 if (!l.Lpreheader) /* if no preheader */ 723 { 724 block *h; 725 block *p; 726 727 if (debugc) printf("Generating preheader for loop\n"); 728 addblk = true; /* add one */ 729 p = block_calloc(); // the preheader 730 h = l.Lhead; /* loop header */ 731 732 /* Find parent of h */ 733 if (h == startblock) 734 startblock = p; 735 else 736 { 737 for (auto ph = startblock; 1; ph = ph.Bnext) 738 { 739 assert(ph); /* should have found it */ 740 if (ph.Bnext == h) 741 { 742 // Link p into block list between ph and h 743 ph.Bnext = p; 744 break; 745 } 746 } 747 } 748 p.Bnext = h; 749 750 l.Lpreheader = p; 751 p.BC = BCgoto; 752 assert(p.Bsucc == null); 753 list_append(&(p.Bsucc),h); /* only successor is h */ 754 p.Btry = h.Btry; 755 756 if (debugc) printf("Adding preheader %p to loop %p\n",p,&l); 757 758 // Move preds of h that aren't in the loop to preds of p 759 for (list_t bl = h.Bpred; bl;) 760 { 761 block *b = list_block(bl); 762 763 if (!vec_testbit (b.Bdfoidx, l.Lloop)) 764 { 765 list_append(&(p.Bpred), b); 766 list_subtract(&(h.Bpred), b); 767 bl = h.Bpred; /* dunno what subtract did */ 768 769 /* Fix up successors of predecessors */ 770 foreach (bls; ListRange(b.Bsucc)) 771 if (list_block(bls) == h) 772 bls.ptr = cast(void *)p; 773 } 774 else 775 bl = list_next(bl); 776 } 777 list_append(&(h.Bpred),p); /* p is a predecessor to h */ 778 } 779 } /* for */ 780 if (addblk) /* if any blocks were added */ 781 { 782 compdfo(); /* compute depth-first order */ 783 blockinit(); 784 compdom(); 785 findloops(dfo[], startloop); // recompute block info 786 addblk = false; 787 } 788 789 /* Do the loop optimizations. 790 */ 791 792 doflow = true; /* do flow analysis */ 793 794 if (go.mfoptim & MFtime) 795 { 796 if (debugc) printf("Starting loop unrolling\n"); 797 L2: 798 while (1) 799 { 800 foreach (ref l; startloop) 801 { 802 if (loopunroll(l)) 803 { 804 compdfo(); // compute depth-first order 805 blockinit(); 806 compdom(); 807 findloops(dfo[], startloop); // recompute block info 808 doflow = true; 809 continue L2; 810 } 811 } 812 break; 813 } 814 } 815 816 /* Note that accessing the loops 817 * starting from startloop will access them in least nested 818 * one first, thus moving LIs out as far as possible 819 */ 820 if (debugc) printf("Starting loop invariants\n"); 821 822 foreach_reverse (ref l; startloop) 823 { 824 //if (debugc) l.print(); 825 826 file_progress(); 827 assert(l.Lpreheader); 828 if (doflow) 829 { 830 flowrd(); /* compute reaching definitions */ 831 flowlv(); /* compute live variables */ 832 flowae(); // compute available expressions 833 doflow = false; /* no need to redo it */ 834 if (go.defnod.length == 0) /* if no definition elems */ 835 break; /* no need to optimize */ 836 } 837 lv = l.Lloop; 838 if (debugc) printf("...Loop %p start...\n",&l); 839 840 /* Unmark all elems in this loop */ 841 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) 842 if (dfo[i].Belem) 843 unmarkall(dfo[i].Belem); /* unmark all elems */ 844 845 /* Find & mark all LIs */ 846 gin = vec_clone(l.Lpreheader.Bout); 847 vec_t rd = vec_calloc(go.defnod.length); /* allocate our running RD vector */ 848 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) // for each block in loop 849 { 850 block *b = dfo[i]; 851 852 if (debugc) printf("B%d\n",i); 853 if (b.Belem) 854 { 855 vec_copy(rd, b.Binrd); // IN reaching defs 856 static if (0) 857 { 858 printf("i = %d\n",i); 859 { 860 for (int j = 0; j < go.defnod.length; j++) 861 elem_print(go.defnod[j].DNelem); 862 } 863 printf("rd : "); vec_println(rd); 864 } 865 gblock = b; 866 gref = 0; 867 if (b != l.Lhead) 868 gref = 1; 869 markinvar(b.Belem, rd); 870 static if (0) 871 { 872 printf("B%d\n", i); 873 { 874 foreach (j; 0 .. go.defnod.length) 875 { 876 printf(" [%2d] ", j); 877 WReqn(go.defnod[j].DNelem); 878 printf("\n"); 879 } 880 } 881 printf("rd : "); vec_println(rd); 882 printf("Boutrd: "); vec_println(b.Boutrd); 883 } 884 assert(vec_equal(rd, b.Boutrd)); 885 } 886 else 887 assert(vec_equal(b.Binrd, b.Boutrd)); 888 } 889 vec_free(rd); 890 vec_free(gin); 891 892 /* Move loop invariants */ 893 for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) 894 { 895 uint domexit; // true if this block dominates all 896 // exit blocks of the loop 897 898 for (uint j = 0; (j = cast(uint) vec_index(j, l.Lexit)) < dfo.length; ++j) // for each exit block 899 { 900 if (!vec_testbit (i, dfo[j].Bdom)) 901 { 902 domexit = 0; 903 goto L1; // break if !(i dom j) 904 } 905 } 906 // if i dom (all exit blocks) 907 domexit = 1; 908 L1: 909 if (dfo[i].Belem) 910 { // If there is any hope of making an improvement 911 if (domexit || l.Llis.length) 912 { 913 //if (dfo[i] != l.Lhead) 914 //domexit |= 2; 915 movelis(dfo[i].Belem, dfo[i], l, domexit); 916 } 917 } 918 } 919 if (debugc) printf("...Loop %p done...\n",&l); 920 921 if (go.mfoptim & MFliv) 922 { 923 loopiv(l); /* induction variables */ 924 if (addblk) /* if we added a block */ 925 { 926 compdfo(); 927 goto restart; /* play it safe and start over */ 928 } 929 } 930 } /* for */ 931 freeloop(startloop); 932 startloop_cache = startloop; 933 } 934 935 /***************************** 936 * If elem is loop invariant, mark it. 937 * Input: 938 * lv = vector of all the blocks in this loop. 939 * rd = vector of loop invariants for this elem. This must be 940 * continually updated. 941 * Note that we do not iterate until no more LIs are found. The only 942 * thing this would buy us is stuff that depends on LI assignments. 943 */ 944 945 private void markinvar(elem *n,vec_t rd) 946 { 947 vec_t tmp; 948 uint i; 949 Symbol *v; 950 elem *n1; 951 952 assert(n && rd); 953 assert(vec_numbits(rd) == go.defnod.length); 954 switch (n.Eoper) 955 { 956 case OPaddass: case OPminass: case OPmulass: case OPandass: 957 case OPorass: case OPxorass: case OPdivass: case OPmodass: 958 case OPshlass: case OPshrass: case OPashrass: 959 case OPpostinc: case OPpostdec: 960 case OPcall: 961 case OPvecsto: 962 case OPcmpxchg: 963 markinvar(n.EV.E2,rd); 964 goto case OPnegass; 965 966 case OPnegass: 967 n1 = n.EV.E1; 968 if (n1.Eoper == OPind) 969 markinvar(n1.EV.E1,rd); 970 else if (OTbinary(n1.Eoper)) 971 { markinvar(n1.EV.E1,rd); 972 markinvar(n1.EV.E2,rd); 973 } 974 L2: 975 if (n.Eoper == OPcall || 976 gblock.Btry || 977 !(n1.Eoper == OPvar && 978 symbol_isintab(n1.EV.Vsym))) 979 { 980 gref = 1; 981 } 982 983 updaterd(n,rd,null); 984 break; 985 986 case OPcallns: 987 markinvar(n.EV.E2,rd); 988 markinvar(n.EV.E1,rd); 989 break; 990 991 case OPstrcpy: 992 case OPstrcat: 993 case OPmemcpy: 994 case OPmemset: 995 markinvar(n.EV.E2,rd); 996 markinvar(n.EV.E1,rd); 997 updaterd(n,rd,null); 998 break; 999 1000 case OPbtc: 1001 case OPbtr: 1002 case OPbts: 1003 markinvar(n.EV.E1,rd); 1004 markinvar(n.EV.E2,rd); 1005 updaterd(n,rd,null); 1006 break; 1007 1008 case OPucall: 1009 markinvar(n.EV.E1,rd); 1010 goto case OPasm; 1011 1012 case OPasm: 1013 gref = 1; 1014 updaterd(n,rd,null); 1015 break; 1016 1017 case OPucallns: 1018 case OPstrpar: 1019 case OPstrctor: 1020 case OPvector: 1021 case OPvoid: 1022 case OPstrlen: 1023 case OPddtor: 1024 case OPinp: 1025 case OPprefetch: // don't mark E2 1026 markinvar(n.EV.E1,rd); 1027 break; 1028 1029 case OPcond: 1030 case OPparam: 1031 case OPstrcmp: 1032 case OPmemcmp: 1033 case OPbt: // OPbt is like OPind, assume not LI 1034 case OPoutp: 1035 markinvar(n.EV.E1,rd); 1036 markinvar(n.EV.E2,rd); 1037 break; 1038 1039 case OPandand: 1040 case OPoror: 1041 markinvar(n.EV.E1,rd); 1042 tmp = vec_clone(rd); 1043 markinvar(n.EV.E2,tmp); 1044 if (el_returns(n.EV.E2)) 1045 vec_orass(rd,tmp); // rd |= tmp 1046 vec_free(tmp); 1047 break; 1048 1049 case OPcolon: 1050 case OPcolon2: 1051 tmp = vec_clone(rd); 1052 switch (el_returns(n.EV.E1) * 2 | int(el_returns(n.EV.E2))) 1053 { 1054 case 3: // E1 and E2 return 1055 markinvar(n.EV.E1,rd); 1056 markinvar(n.EV.E2,tmp); 1057 vec_orass(rd,tmp); // rd |= tmp 1058 break; 1059 case 2: // E1 returns 1060 markinvar(n.EV.E1,rd); 1061 markinvar(n.EV.E2,tmp); 1062 break; 1063 case 1: // E2 returns 1064 markinvar(n.EV.E1,tmp); 1065 markinvar(n.EV.E2,rd); 1066 break; 1067 case 0: // neither returns 1068 markinvar(n.EV.E1,tmp); 1069 vec_copy(tmp,rd); 1070 markinvar(n.EV.E2,tmp); 1071 break; 1072 default: 1073 assert(0); 1074 } 1075 vec_free(tmp); 1076 break; 1077 1078 case OPaddr: // mark addresses of OPvars as LI 1079 markinvar(n.EV.E1,rd); 1080 if (n.EV.E1.Eoper == OPvar || isLI(n.EV.E1)) 1081 makeLI(n); 1082 break; 1083 1084 case OPmsw: 1085 case OPneg: case OPbool: case OPnot: case OPcom: 1086 case OPs16_32: case OPd_s32: case OPs32_d: 1087 case OPd_s16: case OPs16_d: case OPd_f: case OPf_d: 1088 case OP32_16: case OPu8_16: 1089 case OPld_d: case OPd_ld: 1090 case OPld_u64: 1091 case OPc_r: case OPc_i: 1092 case OPu16_32: 1093 case OPu16_d: case OPd_u16: 1094 case OPs8_16: case OP16_8: 1095 case OPd_u32: case OPu32_d: 1096 1097 case OPs32_64: case OPu32_64: 1098 case OP64_32: 1099 case OPd_s64: case OPd_u64: 1100 case OPs64_d: 1101 case OPu64_d: 1102 case OP128_64: 1103 case OPs64_128: 1104 case OPu64_128: 1105 1106 case OPabs: 1107 case OPtoprec: 1108 case OPrndtol: 1109 case OPrint: 1110 case OPsetjmp: 1111 case OPbsf: 1112 case OPbsr: 1113 case OPbswap: 1114 case OPpopcnt: 1115 case OPsqrt: 1116 case OPsin: 1117 case OPcos: 1118 case OPvp_fp: /* BUG for MacHandles */ 1119 case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp: 1120 case OPcvp_fp: 1121 case OPvecfill: 1122 markinvar(n.EV.E1,rd); 1123 if (isLI(n.EV.E1)) /* if child is LI */ 1124 makeLI(n); 1125 break; 1126 1127 case OPeq: 1128 case OPstreq: 1129 markinvar(n.EV.E2,rd); 1130 n1 = n.EV.E1; 1131 markinvar(n1,rd); 1132 1133 /* Determine if assignment is LI. Conditions are: */ 1134 /* 1) Rvalue is LI */ 1135 /* 2) Lvalue is a variable (simplifies things a lot) */ 1136 /* 3) Lvalue can only be affected by unambiguous defs */ 1137 /* 4) No rd's of lvalue that are within the loop (other */ 1138 /* than the current def) */ 1139 if (isLI(n.EV.E2) && n1.Eoper == OPvar) /* 1 & 2 */ 1140 { 1141 v = n1.EV.Vsym; 1142 if (v.Sflags & SFLunambig) 1143 { 1144 tmp = vec_calloc(go.defnod.length); 1145 //filterrd(tmp,rd,v); 1146 listrds(rd,n1,tmp,null); 1147 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) 1148 if (go.defnod[i].DNelem != n && 1149 vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1150 goto L3; 1151 makeLI(n); // then the def is LI 1152 L3: vec_free(tmp); 1153 } 1154 } 1155 goto L2; 1156 1157 case OPadd: case OPmin: case OPmul: case OPand: 1158 case OPor: case OPxor: case OPdiv: case OPmod: 1159 case OPshl: case OPshr: case OPeqeq: case OPne: 1160 case OPlt: case OPle: case OPgt: case OPge: 1161 case OPashr: 1162 case OPror: case OProl: 1163 case OPbtst: 1164 1165 case OPunord: case OPlg: case OPleg: case OPule: 1166 case OPul: case OPuge: case OPug: case OPue: 1167 case OPngt: case OPnge: case OPnlt: case OPnle: 1168 case OPord: case OPnlg: case OPnleg: case OPnule: 1169 case OPnul: case OPnuge: case OPnug: case OPnue: 1170 1171 case OPcomma: 1172 case OPpair: 1173 case OPrpair: 1174 case OPremquo: 1175 case OPscale: 1176 case OPyl2x: 1177 case OPyl2xp1: 1178 markinvar(n.EV.E1,rd); 1179 markinvar(n.EV.E2,rd); 1180 if (isLI(n.EV.E2) && isLI(n.EV.E1)) 1181 makeLI(n); 1182 break; 1183 1184 case OPind: /* must assume this is not LI */ 1185 markinvar(n.EV.E1,rd); 1186 if (isLI(n.EV.E1)) 1187 { 1188 static if (0) 1189 { 1190 // This doesn't work with C++, because exp2_ptrtocomtype() will 1191 // transfer const to where it doesn't belong. 1192 if (n.Ety & mTYconst) 1193 { 1194 makeLI(n); 1195 } 1196 1197 // This was disabled because it was marking as LI 1198 // the loop dimension for the [i] array if 1199 // a[j][i] was in a loop. This meant the a[j] array bounds 1200 // check for the a[j].length was skipped. 1201 else if (n.Ejty) 1202 { 1203 tmp = vec_calloc(go.defnod.length); 1204 filterrdind(tmp,rd,n); // only the RDs pertaining to n 1205 1206 // if (no RDs within loop) 1207 // then it's loop invariant 1208 1209 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) // for each RD 1210 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1211 goto L10; // found a RD in the loop 1212 1213 // If gref has occurred, this can still be LI 1214 // if n is an AE that was also an AE at the 1215 // point of gref. 1216 // We can catch a subset of these cases by looking 1217 // at the AEs at the start of the loop. 1218 if (gref) 1219 { 1220 int j; 1221 1222 //printf("\tn is: "); WReqn(n); printf("\n"); 1223 for (j = 0; (j = cast(uint) vec_index(j, gin)) < go.exptop; ++j) 1224 { 1225 elem *e = go.expnod[j]; 1226 1227 //printf("\t\tgo.expnod[%d] = %p\n",j,e); 1228 //printf("\t\tAE is: "); WReqn(e); printf("\n"); 1229 if (el_match2(n,e)) 1230 { 1231 makeLI(n); 1232 //printf("Ind LI: "); WReqn(n); printf("\n"); 1233 break; 1234 } 1235 } 1236 } 1237 else 1238 makeLI(n); 1239 L10: 1240 vec_free(tmp); 1241 break; 1242 } 1243 } 1244 } 1245 break; 1246 1247 case OPvar: 1248 v = n.EV.Vsym; 1249 if (v.Sflags & SFLunambig) // must be unambiguous to be LI 1250 { 1251 tmp = vec_calloc(go.defnod.length); 1252 //filterrd(tmp,rd,v); // only the RDs pertaining to v 1253 listrds(rd,n,tmp,null); // only the RDs pertaining to v 1254 1255 // if (no RDs within loop) 1256 // then it's loop invariant 1257 1258 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i) // for each RD 1259 if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv)) 1260 goto L1; // found a RD in the loop 1261 makeLI(n); 1262 1263 L1: vec_free(tmp); 1264 } 1265 break; 1266 1267 case OPstring: 1268 case OPrelconst: 1269 case OPconst: /* constants are always LI */ 1270 case OPframeptr: 1271 makeLI(n); 1272 break; 1273 1274 case OPinfo: 1275 markinvar(n.EV.E2,rd); 1276 break; 1277 1278 case OPstrthis: 1279 case OPmark: 1280 case OPctor: 1281 case OPdtor: 1282 case OPdctor: 1283 case OPhalt: 1284 case OPgot: // shouldn't OPgot be makeLI ? 1285 break; 1286 1287 default: 1288 WROP(n.Eoper); 1289 //printf("n.Eoper = %d, OPconst = %d\n", n.Eoper, OPconst); 1290 assert(0); 1291 } 1292 1293 debug 1294 { 1295 if (debugc && isLI(n)) 1296 { 1297 printf(" LI elem: "); 1298 WReqn(n); 1299 printf("\n"); 1300 } 1301 } 1302 } 1303 1304 /************************************** 1305 * Fill in the DefNode.DNumambig vector. 1306 * Set bits defnod[] indices for entries 1307 * which are completely destroyed when e is 1308 * unambiguously assigned to. 1309 * Params: 1310 * v = vector to fill in 1311 * e = defnod[] entry that is an assignment to a variable 1312 */ 1313 1314 extern (C) 1315 { 1316 void fillInDNunambig(vec_t v, elem *e) 1317 { 1318 assert(OTassign(e.Eoper)); 1319 elem *t = e.EV.E1; 1320 assert(t.Eoper == OPvar); 1321 Symbol *d = t.EV.Vsym; 1322 1323 targ_size_t toff = t.EV.Voffset; 1324 targ_size_t tsize = (e.Eoper == OPstreq) ? type_size(e.ET) : tysize(t.Ety); 1325 targ_size_t ttop = toff + tsize; 1326 1327 //printf("updaterd: "); WReqn(n); printf(" toff=%d, tsize=%d\n", toff, tsize); 1328 1329 1330 // for all unambig defs in go.defnod[] 1331 foreach (const i; 0 .. go.defnod.length) 1332 { 1333 elem *tn = go.defnod[i].DNelem; 1334 elem *tn1; 1335 targ_size_t tn1size; 1336 1337 if (!OTassign(tn.Eoper)) 1338 continue; 1339 1340 // If def of same variable, kill that def 1341 tn1 = tn.EV.E1; 1342 if (tn1.Eoper != OPvar || d != tn1.EV.Vsym) 1343 continue; 1344 1345 // If t completely overlaps tn1 1346 tn1size = (tn.Eoper == OPstreq) 1347 ? type_size(tn.ET) : tysize(tn1.Ety); 1348 if (toff <= tn1.EV.Voffset && 1349 tn1.EV.Voffset + tn1size <= ttop) 1350 { 1351 vec_setbit(cast(uint)i, v); 1352 } 1353 } 1354 } 1355 } 1356 1357 /******************** 1358 * Update rd vector. 1359 * Input: 1360 * n assignment elem or function call elem or OPasm elem 1361 * rd reaching def vector to update 1362 * (clear bits for defs we kill, set bit for n (which is the 1363 * def we are genning)) 1364 * vecdim go.defnod.length 1365 */ 1366 1367 extern (C) { 1368 void updaterd(elem *n,vec_t GEN,vec_t KILL) 1369 { 1370 const op = n.Eoper; 1371 elem *t; 1372 1373 assert(OTdef(op)); 1374 assert(GEN); 1375 elem_debug(n); 1376 1377 uint ni = n.Edef; 1378 assert(ni != -1); 1379 1380 // If unambiguous def 1381 if (OTassign(op) && (t = n.EV.E1).Eoper == OPvar) 1382 { 1383 vec_t v = go.defnod[ni].DNunambig; 1384 assert(v); 1385 if (KILL) 1386 vec_orass(KILL, v); 1387 vec_subass(GEN, v); 1388 } 1389 else 1390 { 1391 static if (0) 1392 { 1393 if (OTassign(op) && t.Eoper != OPvar && t.Ejty) 1394 { 1395 // for all unambig defs in go.defnod[] 1396 foreach (uint i; 0 .. go.defnod.length) 1397 { 1398 elem *tn = go.defnod[i].DNelem; 1399 elem *tn1; 1400 1401 if (tn == n) 1402 ni = i; 1403 1404 if (!OTassign(tn.Eoper)) 1405 continue; 1406 1407 // If def of same variable, kill that def 1408 tn1 = tn.EV.E1; 1409 if (tn1.Eoper != OPind || t.Ejty != tn1.Ejty) 1410 continue; 1411 1412 if (KILL) 1413 vec_setbit(i,KILL); 1414 vec_clearbit(i,GEN); 1415 } 1416 } 1417 } 1418 } 1419 1420 vec_setbit(ni,GEN); // set bit in GEN for this def 1421 } 1422 } 1423 1424 /*************************** 1425 * Mark all elems as not being loop invariant. 1426 */ 1427 1428 private void unmarkall(elem *e) 1429 { 1430 for (; 1; e = e.EV.E1) 1431 { 1432 assert(e); 1433 e.Nflags &= ~NFLli; /* unmark this elem */ 1434 if (OTunary(e.Eoper)) 1435 continue; 1436 else if (OTbinary(e.Eoper)) 1437 { 1438 unmarkall(e.EV.E2); 1439 continue; 1440 } 1441 return; 1442 } 1443 } 1444 1445 1446 /******************************** 1447 * Search for references to v in tree n before nstop is encountered. 1448 * Params: 1449 * v = symbol to search for 1450 * n = tree to search 1451 * nstop = stop searching tree when reaching this elem 1452 * Returns: 1453 * true if there are any refs of v in n before nstop is encountered 1454 */ 1455 1456 private bool refs(Symbol *v,elem *n,elem *nstop) 1457 { 1458 symbol_debug(v); 1459 assert(symbol_isintab(v)); 1460 assert(v.Ssymnum < globsym.length); 1461 bool stop = false; 1462 1463 // Walk tree in evaluation order 1464 bool walk(elem* n) 1465 { 1466 elem_debug(n); 1467 assert(n); 1468 1469 if (stop) 1470 return false; 1471 bool f = false; 1472 const op = n.Eoper; 1473 if (OTunary(op)) 1474 f = walk(n.EV.E1); 1475 else if (OTbinary(op)) 1476 { 1477 if (ERTOL(n)) /* watch order of evaluation */ 1478 { 1479 /* Note that (OPvar = e) is not a ref of OPvar, whereas */ 1480 /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */ 1481 /* a ref of OPvar, etc. */ 1482 f = walk(n.EV.E2); 1483 if (!f) 1484 { 1485 if (op == OPeq) 1486 { 1487 if (n.EV.E1.Eoper != OPvar) 1488 f = walk(n.EV.E1.EV.E1); 1489 } 1490 else 1491 f = walk(n.EV.E1); 1492 } 1493 } 1494 else 1495 f = walk(n.EV.E1) || walk(n.EV.E2); 1496 } 1497 1498 if (n == nstop) 1499 stop = true; 1500 else if (n.Eoper == OPvar) /* if variable reference */ 1501 return v == n.EV.Vsym; 1502 else if (op == OPasm) /* everything is referenced */ 1503 return true; 1504 return f; 1505 } 1506 1507 return walk(n); 1508 } 1509 1510 /************************* 1511 * Move LIs to preheader. 1512 * Conditions to be satisfied for code motion are: 1513 * 1) All exit blocks are dominated (true before this is called). 1514 * -- OR -- 1515 * 2) Variable assigned by a statement is not live on entering 1516 * any successor outside the loop of any exit block of the 1517 * loop. 1518 * 1519 * 3) Cannot move assignment to variable if there are any other 1520 * assignments to that variable within the loop (true or 1521 * assignment would not have been marked LI). 1522 * 4) Cannot move assignments to a variable if there is a use 1523 * of that variable in this loop that is reached by any other 1524 * def of it. 1525 * 5) Cannot move expressions that have side effects. 1526 * 6) Do not move assignments to variables that could be affected 1527 * by ambiguous defs. 1528 * 7) It is not worth it to move expressions of the form: 1529 * (var == const) 1530 * Input: 1531 * n the elem we're considering moving 1532 * b the block this elem is in 1533 * l the loop we're in 1534 * domexit flags 1535 * bit 0: 1 this branch is always executed 1536 * 0 this branch is only sometimes executed 1537 * bit 1: 1 do not move LIs that could throw exceptions 1538 * or cannot be moved past possibly thrown exceptions 1539 * Returns: 1540 * revised domexit 1541 */ 1542 1543 private void movelis(elem* n, block* b, ref loop l, ref uint pdomexit) 1544 { 1545 vec_t tmp; 1546 elem *ne; 1547 elem *t; 1548 elem *n2; 1549 Symbol *v; 1550 tym_t ty; 1551 1552 Lnextlis: 1553 //if (isLI(n)) { printf("movelis(B%d, ", b.Bdfoidx); WReqn(n); printf(")\n"); } 1554 assert(n); 1555 elem_debug(n); 1556 const op = n.Eoper; 1557 switch (op) 1558 { 1559 case OPvar: 1560 case OPconst: 1561 case OPrelconst: 1562 goto Lret; 1563 1564 case OPandand: 1565 case OPoror: 1566 case OPcond: 1567 { 1568 uint domexit; 1569 1570 movelis(n.EV.E1,b,l,pdomexit); // always executed 1571 domexit = pdomexit & ~1; // sometimes executed 1572 movelis(n.EV.E2,b,l,domexit); 1573 pdomexit |= domexit & 2; 1574 goto Lret; 1575 } 1576 1577 case OPeq: 1578 // Do loop invariant assignments 1579 if (isLI(n) && n.EV.E1.Eoper == OPvar) 1580 { 1581 v = n.EV.E1.EV.Vsym; // variable index number 1582 1583 if (!(v.Sflags & SFLunambig)) goto L3; // case 6 1584 1585 // If case 4 is not satisfied, return 1586 1587 // Function parameters have an implied definition prior to the 1588 // first block of the function. Unfortunately, the rd vector 1589 // does not take this into account. Therefore, we assume the 1590 // worst and reject assignments to function parameters. 1591 if (v.Sclass == SCparameter || v.Sclass == SCregpar || 1592 v.Sclass == SCfastpar || v.Sclass == SCshadowreg) 1593 goto L3; 1594 1595 if (el_sideeffect(n.EV.E2)) goto L3; // case 5 1596 1597 // If case 1 or case 2 is not satisfied, return 1598 1599 if (!(pdomexit & 1)) // if not case 1 1600 { 1601 uint i; 1602 for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 1603 { 1604 foreach (bl; ListRange(dfo[i].Bsucc)) 1605 { 1606 block *s; // successor to exit block 1607 1608 s = list_block(bl); 1609 if (!vec_testbit(s.Bdfoidx,l.Lloop) && 1610 (!symbol_isintab(v) || 1611 vec_testbit(v.Ssymnum,s.Binlv))) // if v is live on exit 1612 goto L3; 1613 } 1614 } 1615 } 1616 1617 tmp = vec_calloc(go.defnod.length); 1618 uint i; 1619 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 1620 { 1621 if (dfo[i] == b) // except this one 1622 continue; 1623 1624 //<if there are any RDs of v in Binrd other than n> 1625 // <if there are any refs of v in that block> 1626 // return; 1627 1628 //filterrd(tmp,dfo[i].Binrd,v); 1629 listrds(dfo[i].Binrd,n.EV.E1,tmp,null); 1630 uint j; 1631 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j) // for each RD of v in Binrd 1632 { 1633 if (go.defnod[j].DNelem == n) 1634 continue; 1635 if (dfo[i].Belem && 1636 refs(v,dfo[i].Belem,cast(elem *)null)) //if refs of v 1637 { 1638 vec_free(tmp); 1639 goto L3; 1640 } 1641 break; 1642 } 1643 } // foreach 1644 1645 // <if there are any RDs of v in b.Binrd other than n> 1646 // <if there are any references to v before the 1647 // assignment to v> 1648 // <can't move this assignment> 1649 1650 //filterrd(tmp,b.Binrd,v); 1651 listrds(b.Binrd,n.EV.E1,tmp,null); 1652 uint j; 1653 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j) // for each RD of v in Binrd 1654 { 1655 if (go.defnod[j].DNelem == n) 1656 continue; 1657 if (b.Belem && refs(v,b.Belem,n)) 1658 { 1659 vec_free(tmp); 1660 goto L3; // can't move it 1661 } 1662 break; // avoid redundant looping 1663 } 1664 vec_free(tmp); 1665 1666 // We have an LI assignment, n. 1667 // Check to see if the rvalue is already in the preheader. 1668 foreach (e; l.Llis) 1669 { 1670 if (el_match(n.EV.E2, e.EV.E2)) 1671 { 1672 el_free(n.EV.E2); 1673 n.EV.E2 = el_calloc(); 1674 el_copy(n.EV.E2, e.EV.E1); 1675 if (debugc) printf("LI assignment rvalue was replaced\n"); 1676 doflow = true; 1677 go.changes++; 1678 break; 1679 } 1680 } 1681 1682 // move assignment elem to preheader 1683 if (debugc) printf("Moved LI assignment "); 1684 1685 debug 1686 if (debugc) 1687 { 1688 WReqn(n); 1689 printf(";\n"); 1690 } 1691 1692 go.changes++; 1693 doflow = true; // redo flow analysis 1694 ne = el_calloc(); 1695 el_copy(ne,n); // create assignment elem 1696 assert(l.Lpreheader); // make sure there is one 1697 appendelem(ne,&(l.Lpreheader.Belem)); // append ne to preheader 1698 l.Llis.push(ne); 1699 1700 el_copy(n,ne.EV.E1); // replace n with just a reference to v 1701 goto Lret; 1702 } // if 1703 break; 1704 1705 case OPcall: 1706 case OPucall: 1707 pdomexit |= 2; 1708 break; 1709 1710 case OPpair: 1711 case OPrpair: // don't move these, as they do not do computation 1712 movelis(n.EV.E1,b,l,pdomexit); 1713 n = n.EV.E2; 1714 goto Lnextlis; 1715 1716 default: 1717 break; 1718 } 1719 1720 L3: 1721 // Do leaves of non-LI expressions, leaves of = elems that didn't 1722 // meet the invariant assignment removal criteria, and don't do leaves 1723 if (OTleaf(op)) 1724 goto Lret; 1725 if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot || 1726 // These are usually addressing modes, so moving them is a net loss 1727 (I32 && op == OPshl && n.EV.E2.Eoper == OPconst && el_tolong(n.EV.E2) <= 3UL) 1728 ) 1729 { 1730 if (OTassign(op)) 1731 { 1732 elem *n1 = n.EV.E1; 1733 elem *n11; 1734 1735 if (OTbinary(op)) 1736 movelis(n.EV.E2,b,l,pdomexit); 1737 1738 // Do lvalue only if it is an expression 1739 if (n1.Eoper == OPvar) 1740 goto Lret; 1741 n11 = n1.EV.E1; 1742 if (OTbinary(n1.Eoper)) 1743 { 1744 movelis(n11,b,l,pdomexit); 1745 n = n1.EV.E2; 1746 } 1747 // If *(x + c), just make x the LI, not the (x + c). 1748 // The +c comes free with the addressing mode. 1749 else if (n1.Eoper == OPind && 1750 isLI(n11) && 1751 n11.Eoper == OPadd && 1752 n11.EV.E2.Eoper == OPconst 1753 ) 1754 { 1755 n = n11.EV.E1; 1756 } 1757 else 1758 n = n11; 1759 movelis(n,b,l,pdomexit); 1760 if (b.Btry || !(n1.Eoper == OPvar && symbol_isintab(n1.EV.Vsym))) 1761 { 1762 //printf("assign to global => domexit |= 2\n"); 1763 pdomexit |= 2; 1764 } 1765 } 1766 else if (OTunary(op)) 1767 { 1768 elem *e1 = n.EV.E1; 1769 1770 // If *(x + c), just make x the LI, not the (x + c). 1771 // The +c comes free with the addressing mode. 1772 if (op == OPind && 1773 isLI(e1) && 1774 e1.Eoper == OPadd && 1775 e1.EV.E2.Eoper == OPconst 1776 ) 1777 { 1778 n = e1.EV.E1; 1779 } 1780 else 1781 n = e1; 1782 } 1783 else if (OTbinary(op)) 1784 { 1785 movelis(n.EV.E1,b,l,pdomexit); 1786 n = n.EV.E2; 1787 } 1788 goto Lnextlis; 1789 } 1790 1791 if (el_sideeffect(n)) 1792 goto Lret; 1793 1794 static if (0) 1795 { 1796 printf("*pdomexit = %u\n", pdomexit); 1797 if (pdomexit & 2) 1798 { 1799 // If any indirections, can't LI it 1800 1801 // If this operand has already been indirected, we can let 1802 // it pass. 1803 Symbol *s; 1804 1805 printf("looking at:\n"); 1806 elem_print(n); 1807 s = el_basesym(n.EV.E1); 1808 if (s) 1809 { 1810 foreach (el; l.Llis) 1811 { 1812 el = el.EV.E2; 1813 elem_print(el); 1814 if (el.Eoper == OPind && el_basesym(el.EV.E1) == s) 1815 { 1816 printf(" pass!\n"); 1817 goto Lpass; 1818 } 1819 } 1820 } 1821 printf(" skip!\n"); 1822 goto Lret; 1823 1824 Lpass: 1825 } 1826 } 1827 1828 // Move the LI expression to the preheader 1829 if (debugc) printf("Moved LI expression "); 1830 1831 debug 1832 if (debugc) 1833 { 1834 WReqn(n); 1835 printf(";\n"); 1836 } 1837 1838 // See if it's already been moved 1839 ty = n.Ety; 1840 foreach (el; l.Llis) 1841 { 1842 tym_t ty2; 1843 1844 //printf("existing LI: "); WReqn(el); printf("\n"); 1845 ty2 = el.EV.E2.Ety; 1846 if (tysize(ty) == tysize(ty2)) 1847 { 1848 el.EV.E2.Ety = ty; 1849 if (el_match(n,el.EV.E2)) 1850 { 1851 el.EV.E2.Ety = ty2; 1852 if (!OTleaf(n.Eoper)) 1853 { el_free(n.EV.E1); 1854 if (OTbinary(n.Eoper)) 1855 el_free(n.EV.E2); 1856 } 1857 el_copy(n,el.EV.E1); // make copy of temp 1858 n.Ety = ty; 1859 1860 debug 1861 if (debugc) 1862 { printf("Already moved: LI expression replaced with "); 1863 WReqn(n); 1864 printf("\nPreheader %d expression %p ", 1865 l.Lpreheader.Bdfoidx,l.Lpreheader.Belem); 1866 WReqn(l.Lpreheader.Belem); 1867 printf("\n"); 1868 } 1869 1870 go.changes++; 1871 doflow = true; // redo flow analysis 1872 goto Lret; 1873 } 1874 el.EV.E2.Ety = ty2; 1875 } 1876 } 1877 1878 if (!(pdomexit & 1)) // if only sometimes executed 1879 { 1880 if (debugc) printf(" doesn't dominate exit\n"); 1881 goto Lret; // don't move LI 1882 } 1883 1884 if (tyaggregate(n.Ety)) 1885 goto Lret; 1886 1887 go.changes++; 1888 doflow = true; // redo flow analysis 1889 1890 t = el_alloctmp(n.Ety); /* allocate temporary t */ 1891 1892 debug 1893 { 1894 if (debugc) printf("movelis() introduced new variable '%s' of type ",t.EV.Vsym.Sident.ptr); 1895 if (debugc) WRTYxx(t.Ety); 1896 if (debugc) printf("\n"); 1897 } 1898 1899 n2 = el_calloc(); 1900 el_copy(n2,n); /* create copy n2 of n */ 1901 ne = el_bin(OPeq,t.Ety,t,n2); /* create elem t=n2 */ 1902 assert(l.Lpreheader); /* make sure there is one */ 1903 appendelem(ne,&(l.Lpreheader.Belem)); /* append ne to preheader */ 1904 1905 debug 1906 if (debugc) 1907 { 1908 printf("Preheader %d expression %p\n\t", 1909 l.Lpreheader.Bdfoidx,l.Lpreheader.Belem); 1910 WReqn(l.Lpreheader.Belem); 1911 printf("\nLI expression replaced with "); WReqn(t); 1912 printf("\n"); 1913 } 1914 1915 el_copy(n,t); /* replace this elem with t */ 1916 1917 // Remember LI expression in elem list 1918 l.Llis.push(ne); 1919 1920 Lret: 1921 1922 } 1923 1924 /*************************** 1925 * Append elem to existing elem using an OPcomma elem. 1926 * Input: 1927 * n elem to append 1928 * *pn elem to append to 1929 */ 1930 1931 private void appendelem(elem *n,elem **pn) 1932 { 1933 assert(n && pn); 1934 if (*pn) /* if this elem exists */ 1935 { 1936 while ((*pn).Eoper == OPcomma) /* while we see OPcomma elems */ 1937 { 1938 (*pn).Ety = n.Ety; 1939 pn = &((*pn).EV.E2); /* cruise down right side */ 1940 } 1941 *pn = el_bin(OPcomma,n.Ety,*pn,n); 1942 } 1943 else 1944 *pn = n; /* else create a elem */ 1945 } 1946 1947 /************** LOOP INDUCTION VARIABLES **********************/ 1948 1949 /**************************** 1950 * Create a new famlist entry. 1951 */ 1952 1953 private void newfamlist(famlist* fl, tym_t ty) 1954 { 1955 eve c = void; 1956 memset(&c,0,c.sizeof); 1957 1958 fl.FLty = ty; 1959 switch (tybasic(ty)) 1960 { case TYfloat: 1961 c.Vfloat = 1; 1962 break; 1963 1964 case TYdouble: 1965 case TYdouble_alias: 1966 c.Vdouble = 1; 1967 break; 1968 1969 case TYldouble: 1970 c.Vldouble = 1; 1971 break; 1972 1973 case TYbool: 1974 case TYchar: 1975 case TYschar: 1976 case TYuchar: 1977 c.Vchar = 1; 1978 break; 1979 1980 case TYshort: 1981 case TYushort: 1982 case TYchar16: 1983 case TYwchar_t: // BUG: what about 4 byte wchar_t's? 1984 c.Vshort = 1; 1985 break; 1986 1987 case TYsptr: 1988 case TYcptr: 1989 case TYfptr: 1990 case TYvptr: 1991 case TYnptr: 1992 case TYnullptr: 1993 case TYimmutPtr: 1994 case TYsharePtr: 1995 case TYrestrictPtr: 1996 case TYfgPtr: 1997 ty = TYint; 1998 if (I64) 1999 ty = TYllong; 2000 goto case TYuint; 2001 2002 case TYint: 2003 case TYuint: 2004 c.Vint = 1; 2005 break; 2006 2007 case TYhptr: 2008 ty = TYlong; 2009 goto case TYlong; 2010 2011 case TYlong: 2012 case TYulong: 2013 case TYdchar: 2014 default: 2015 c.Vlong = 1; 2016 break; 2017 } 2018 fl.c1 = el_const(ty,&c); /* c1 = 1 */ 2019 c.Vldouble = 0; 2020 if (typtr(ty)) 2021 { 2022 ty = TYint; 2023 if (tybasic(ty) == TYhptr) 2024 ty = TYlong; 2025 if (I64) 2026 ty = TYllong; 2027 } 2028 fl.c2 = el_const(ty,&c); /* c2 = 0 */ 2029 } 2030 2031 /*************************** 2032 * Remove induction variables from loop l. 2033 * Loop invariant removal should have been done just previously. 2034 */ 2035 2036 private void loopiv(ref loop l) 2037 { 2038 if (debugc) printf("loopiv(%p)\n", &l); 2039 assert(l.Livlist.length == 0 && l.Lopeqlist.length == 0); 2040 elimspec(l); 2041 if (doflow) 2042 { 2043 flowrd(); /* compute reaching defs */ 2044 flowlv(); /* compute live variables */ 2045 flowae(); // compute available expressions 2046 doflow = false; 2047 } 2048 findbasivs(l); /* find basic induction variables */ 2049 findopeqs(l); // find op= variables 2050 findivfams(l); /* find IV families */ 2051 elimfrivivs(l); /* eliminate less useful family IVs */ 2052 intronvars(l); /* introduce new variables */ 2053 elimbasivs(l); /* eliminate basic IVs */ 2054 if (!addblk) // adding a block changes the Binlv 2055 elimopeqs(l); // eliminate op= variables 2056 2057 foreach (ref iv; l.Livlist) 2058 iv.reset(); 2059 l.Livlist.reset(); // reset for reuse 2060 2061 foreach (ref iv; l.Lopeqlist) 2062 iv.reset(); 2063 l.Lopeqlist.reset(); // reset for reuse 2064 2065 /* Do copy propagation and dead assignment elimination */ 2066 /* upon return to optfunc() */ 2067 } 2068 2069 /************************************* 2070 * Find basic IVs of loop l. 2071 * A basic IV x of loop l is a variable x which has 2072 * exactly one assignment within l of the form: 2073 * x += c or x -= c, where c is either a constant 2074 * or a LI. 2075 * Input: 2076 * go.defnod[] loaded with all the definition elems of the loop 2077 */ 2078 2079 private void findbasivs(ref loop l) 2080 { 2081 vec_t poss,notposs; 2082 elem *n; 2083 bool ambdone; 2084 2085 ambdone = false; 2086 poss = vec_calloc(globsym.length); 2087 notposs = vec_calloc(globsym.length); /* vector of all variables */ 2088 /* (initially all unmarked) */ 2089 2090 /* for each def in go.defnod[] that is within loop l */ 2091 2092 foreach (const i; 0 .. go.defnod.length) 2093 { 2094 if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop)) 2095 continue; /* def is not in the loop */ 2096 2097 n = go.defnod[i].DNelem; 2098 elem_debug(n); 2099 if (OTassign(n.Eoper) && n.EV.E1.Eoper == OPvar) 2100 { 2101 Symbol *s; /* if unambiguous def */ 2102 2103 s = n.EV.E1.EV.Vsym; 2104 if (symbol_isintab(s)) 2105 { 2106 SYMIDX v; 2107 2108 v = n.EV.E1.EV.Vsym.Ssymnum; 2109 if ((n.Eoper == OPaddass || n.Eoper == OPminass || 2110 n.Eoper == OPpostinc || n.Eoper == OPpostdec) && 2111 (n.EV.E2.Eoper == OPconst || /* if x += c or x -= c */ 2112 n.EV.E2.Eoper == OPvar && isLI(n.EV.E2))) 2113 { 2114 if (vec_testbit(v,poss)) 2115 /* We've already seen this def elem, */ 2116 /* therefore there is more than one */ 2117 /* def of v within the loop, therefore */ 2118 /* v is not a basic IV. */ 2119 vec_setbit(v,notposs); 2120 else 2121 vec_setbit(v,poss); 2122 } 2123 else /* else mark as not possible */ 2124 vec_setbit(v,notposs); 2125 } 2126 } 2127 else /* else ambiguous def */ 2128 { 2129 /* mark any vars that could be affected by */ 2130 /* this def as not possible */ 2131 2132 if (!ambdone) /* avoid redundant loops */ 2133 { 2134 foreach (j; 0 .. globsym.length) 2135 { 2136 if (!(globsym[j].Sflags & SFLunambig)) 2137 vec_setbit(j,notposs); 2138 } 2139 ambdone = true; 2140 } 2141 } 2142 } 2143 static if (0) 2144 { 2145 printf("poss "); vec_println(poss); 2146 printf("notposs "); vec_println(notposs); 2147 } 2148 vec_subass(poss,notposs); /* poss = poss - notposs */ 2149 2150 /* create list of IVs */ 2151 uint i; 2152 for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i) // for each basic IV 2153 { 2154 Symbol *s; 2155 2156 /* Skip if we don't want it to be a basic IV (see funcprev()) */ 2157 s = globsym[i]; 2158 assert(symbol_isintab(s)); 2159 if (s.Sflags & SFLnotbasiciv) 2160 continue; 2161 2162 // Do not use aggregates as basic IVs. This is because the other loop 2163 // code doesn't check offsets into symbols, (assuming everything 2164 // is at offset 0). We could perhaps amend this by allowing basic IVs 2165 // if the struct consists of only one data member. 2166 if (tyaggregate(s.ty())) 2167 continue; 2168 2169 // Do not use complex types as basic IVs, as the code gen isn't up to it 2170 if (tycomplex(s.ty())) 2171 continue; 2172 2173 auto biv = l.Livlist.push(); 2174 biv.IVbasic = s; // symbol of basic IV 2175 biv.IVincr = null; 2176 2177 if (debugc) printf("Symbol '%s' (%d) is a basic IV, ", s.Sident.ptr, i); 2178 2179 /* We have the sym idx of the basic IV. We need to find */ 2180 /* the parent of the increment elem for it. */ 2181 2182 /* First find the go.defnod[] */ 2183 foreach (j; 0 .. go.defnod.length) 2184 { 2185 /* If go.defnod is a def of i and it is in the loop */ 2186 if (go.defnod[j].DNelem.EV.E1 && /* OPasm are def nodes */ 2187 go.defnod[j].DNelem.EV.E1.EV.Vsym == s && 2188 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop)) 2189 { 2190 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem)); 2191 assert(s == (*biv.IVincr).EV.E1.EV.Vsym); 2192 2193 debug if (debugc) 2194 { 2195 printf("Increment elem is: "); WReqn(*biv.IVincr); printf("\n"); 2196 } 2197 goto L1; 2198 } 2199 } 2200 assert(0); /* should have found it */ 2201 /* NOTREACHED */ 2202 2203 L1: 2204 } 2205 2206 vec_free(poss); 2207 vec_free(notposs); 2208 } 2209 2210 /************************************* 2211 * Find op= elems of loop l. 2212 * Analogous to findbasivs(). 2213 * Used to eliminate useless loop code normally found in benchmark programs. 2214 * Input: 2215 * go.defnod[] loaded with all the definition elems of the loop 2216 */ 2217 2218 private void findopeqs(ref loop l) 2219 { 2220 vec_t poss,notposs; 2221 elem *n; 2222 bool ambdone; 2223 2224 ambdone = false; 2225 poss = vec_calloc(globsym.length); 2226 notposs = vec_calloc(globsym.length); // vector of all variables 2227 // (initially all unmarked) 2228 2229 // for each def in go.defnod[] that is within loop l 2230 2231 foreach (i; 0 .. go.defnod.length) 2232 { 2233 if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop)) 2234 continue; // def is not in the loop 2235 2236 n = go.defnod[i].DNelem; 2237 elem_debug(n); 2238 if (OTopeq(n.Eoper) && n.EV.E1.Eoper == OPvar) 2239 { 2240 Symbol *s; // if unambiguous def 2241 2242 s = n.EV.E1.EV.Vsym; 2243 if (symbol_isintab(s)) 2244 { 2245 SYMIDX v; 2246 2247 v = n.EV.E1.EV.Vsym.Ssymnum; 2248 { 2249 if (vec_testbit(v,poss)) 2250 // We've already seen this def elem, 2251 // therefore there is more than one 2252 // def of v within the loop, therefore 2253 // v is not a basic IV. 2254 vec_setbit(v,notposs); 2255 else 2256 vec_setbit(v,poss); 2257 } 2258 } 2259 } 2260 else // else ambiguous def 2261 { 2262 // mark any vars that could be affected by 2263 // this def as not possible 2264 2265 if (!ambdone) // avoid redundant loops 2266 { 2267 foreach (j; 0 .. globsym.length) 2268 { 2269 if (!(globsym[j].Sflags & SFLunambig)) 2270 vec_setbit(j,notposs); 2271 } 2272 ambdone = true; 2273 } 2274 } 2275 } 2276 2277 // Don't use symbols already in Livlist 2278 foreach (ref iv; l.Livlist) 2279 { 2280 vec_setbit(iv.IVbasic.Ssymnum,notposs); 2281 } 2282 2283 2284 static if (0) 2285 { 2286 printf("poss "); vec_println(poss); 2287 printf("notposs "); vec_println(notposs); 2288 } 2289 2290 vec_subass(poss,notposs); // poss = poss - notposs 2291 2292 // create list of IVs 2293 uint i; 2294 for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i) // for each opeq IV 2295 { 2296 Symbol *s; 2297 2298 s = globsym[i]; 2299 assert(symbol_isintab(s)); 2300 2301 // Do not use aggregates as basic IVs. This is because the other loop 2302 // code doesn't check offsets into symbols, (assuming everything 2303 // is at offset 0). We could perhaps amend this by allowing basic IVs 2304 // if the struct consists of only one data member. 2305 if (tyaggregate(s.ty())) 2306 continue; 2307 2308 auto biv = l.Lopeqlist.push(); 2309 biv.IVbasic = s; // symbol of basic IV 2310 biv.IVincr = null; 2311 2312 if (debugc) printf("Symbol '%s' (%d) is an opeq IV, ",s.Sident.ptr,i); 2313 2314 // We have the sym idx of the basic IV. We need to find 2315 // the parent of the increment elem for it. 2316 2317 // First find the go.defnod[] 2318 foreach (j; 0 .. go.defnod.length) 2319 { 2320 // If go.defnod is a def of i and it is in the loop 2321 if (go.defnod[j].DNelem.EV.E1 && // OPasm are def nodes 2322 go.defnod[j].DNelem.EV.E1.EV.Vsym == s && 2323 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop)) 2324 { 2325 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem)); 2326 assert(s == (*biv.IVincr).EV.E1.EV.Vsym); 2327 2328 debug if (debugc) 2329 { 2330 printf("Opeq elem is: "); WReqn(*biv.IVincr); printf("\n"); 2331 } 2332 goto Lcont; 2333 } 2334 } 2335 assert(0); // should have found it 2336 // NOTREACHED 2337 2338 Lcont: 2339 } 2340 2341 vec_free(poss); 2342 vec_free(notposs); 2343 } 2344 2345 /***************************** 2346 * Find families for each basic IV. 2347 * An IV family is a list of elems of the form 2348 * c1*X+c2, where X is a basic induction variable. 2349 * Note that we do not do divides, because of roundoff error problems. 2350 */ 2351 2352 private void findivfams(ref loop l) 2353 { 2354 if (debugc) printf("findivfams(%p)\n", &l); 2355 foreach (ref biv; l.Livlist) 2356 { 2357 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 2358 if (dfo[i].Belem) 2359 ivfamelems(&biv,&(dfo[i].Belem)); 2360 /* Fold all the constant expressions in c1 and c2. */ 2361 foreach (ref fl; biv.IVfamily) 2362 { 2363 fl.c1 = doptelem(fl.c1,GOALvalue | GOALagain); 2364 fl.c2 = doptelem(fl.c2,GOALvalue | GOALagain); 2365 } 2366 } 2367 } 2368 2369 /************************* 2370 * Tree walking support routine for findivfams(). 2371 * biv = basic induction variable pointer 2372 * pn pointer to elem 2373 */ 2374 2375 private void ivfamelems(Iv *biv,elem **pn) 2376 { 2377 tym_t ty,c2ty; 2378 elem *n1; 2379 elem *n2; 2380 2381 assert(pn); 2382 elem* n = *pn; 2383 assert(biv && n); 2384 const op = n.Eoper; 2385 if (OTunary(op)) 2386 { 2387 ivfamelems(biv,&n.EV.E1); 2388 n1 = n.EV.E1; 2389 n2 = null; 2390 } 2391 else if (OTbinary(op)) 2392 { 2393 ivfamelems(biv,&n.EV.E1); 2394 ivfamelems(biv,&n.EV.E2); /* LTOR or RTOL order is unimportant */ 2395 n1 = n.EV.E1; 2396 n2 = n.EV.E2; 2397 } 2398 else /* else leaf elem */ 2399 return; /* which can't be in the family */ 2400 2401 if (tycomplex(n.Ety)) 2402 return; 2403 2404 if (op == OPmul || op == OPadd || op == OPmin || 2405 op == OPneg || op == OPshl) 2406 { /* Note that we are wimping out and not considering */ 2407 /* LI variables as part of c1 and c2, but only constants. */ 2408 2409 ty = n.Ety; 2410 2411 /* Since trees are canonicalized, basic induction variables */ 2412 /* will only appear on the left. */ 2413 2414 /* Improvement: */ 2415 /* We wish to pick up the cases (biv + li), (biv - li) and */ 2416 /* (li + biv). OPmul and LS with bivs are out, since if we */ 2417 /* try to eliminate the biv, and the loop test is a >, >=, */ 2418 /* <, <=, we have a problem since we don't know if the li */ 2419 /* is negative. (Would have to call swaprel() on it.) */ 2420 2421 /* If we have (li + var), swap the leaves. */ 2422 if (op == OPadd && isLI(n1) && n1.Eoper == OPvar && n2.Eoper == OPvar) 2423 { 2424 n.EV.E1 = n2; 2425 n2 = n.EV.E2 = n1; 2426 n1 = n.EV.E1; 2427 } 2428 2429 // Get rid of case where we painted a far pointer to a long 2430 if (op == OPadd || op == OPmin) 2431 { 2432 int sz; 2433 2434 sz = tysize(ty); 2435 if (sz == tysize(TYfptr) && !tyfv(ty) && 2436 (sz != tysize(n1.Ety) || sz != tysize(n2.Ety))) 2437 return; 2438 } 2439 2440 /* Look for function of basic IV (-biv or biv op const) */ 2441 if (n1.Eoper == OPvar && n1.EV.Vsym == biv.IVbasic) 2442 { 2443 if (op == OPneg) 2444 { 2445 if (debugc) printf("found (-biv), elem %p\n",n); 2446 auto fl = biv.IVfamily.push(); 2447 newfamlist(fl, ty); 2448 fl.FLivty = n1.Ety; 2449 fl.FLpelem = pn; 2450 fl.c1 = el_una(op,ty,fl.c1); /* c1 = -1 */ 2451 } 2452 else if (n2.Eoper == OPconst || 2453 isLI(n2) && (op == OPadd || op == OPmin)) 2454 { 2455 debug 2456 if (debugc) 2457 { printf("found (biv op const), elem ("); 2458 WReqn(n); 2459 printf(");\n"); 2460 printf("Types: n1="); WRTYxx(n1.Ety); 2461 printf(" ty="); WRTYxx(ty); 2462 printf(" n2="); WRTYxx(n2.Ety); 2463 printf("\n"); 2464 } 2465 2466 auto fl = biv.IVfamily.push(); 2467 newfamlist(fl, ty); 2468 fl.FLivty = n1.Ety; 2469 fl.FLpelem = pn; 2470 switch (op) 2471 { 2472 case OPadd: /* c2 = right */ 2473 c2ty = n2.Ety; 2474 if (typtr(fl.c2.Ety)) 2475 c2ty = fl.c2.Ety; 2476 goto L1; 2477 2478 case OPmin: /* c2 = -right */ 2479 c2ty = fl.c2.Ety; 2480 /* Check for subtracting two pointers */ 2481 if (typtr(c2ty) && typtr(n2.Ety)) 2482 { 2483 if (tybasic(c2ty) == TYhptr) 2484 c2ty = TYlong; 2485 else 2486 c2ty = I64 ? TYllong : TYint; 2487 } 2488 L1: 2489 fl.c2 = el_bin(op,c2ty,fl.c2,el_copytree(n2)); 2490 break; 2491 2492 case OPmul: /* c1 = right */ 2493 case OPshl: /* c1 = 1 << right */ 2494 fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2)); 2495 break; 2496 2497 default: 2498 assert(0); 2499 } 2500 } 2501 } 2502 2503 /* Look for function of existing IV */ 2504 2505 foreach (ref fl; biv.IVfamily) 2506 { 2507 if (*fl.FLpelem != n1) /* not it */ 2508 continue; 2509 2510 /* Look for (fl op constant) */ 2511 if (op == OPneg) 2512 { 2513 if (debugc) printf("found (-fl), elem %p\n",n); 2514 /* c1 = -c1; c2 = -c2; */ 2515 fl.c1 = el_una(OPneg,ty,fl.c1); 2516 fl.c2 = el_una(OPneg,ty,fl.c2); 2517 fl.FLty = ty; 2518 fl.FLpelem = pn; /* replace with new IV */ 2519 } 2520 else if (n2.Eoper == OPconst || 2521 isLI(n2) && (op == OPadd || op == OPmin)) 2522 { 2523 debug 2524 if (debugc) 2525 { 2526 printf("found (fl op const), elem ("); 2527 WReqn(n); 2528 assert(*pn == n); 2529 printf(");\n"); 2530 elem_print(n); 2531 } 2532 2533 switch (op) 2534 { 2535 case OPmul: 2536 case OPshl: 2537 fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2)); 2538 break; 2539 2540 case OPadd: 2541 case OPmin: 2542 break; 2543 2544 default: 2545 assert(0); 2546 } 2547 fl.c2 = el_bin(op,ty,fl.c2,el_copytree(n2)); 2548 fl.FLty = ty; 2549 fl.FLpelem = pn; /* replace with new IV */ 2550 } /* else if */ 2551 } /* for */ 2552 } /* if */ 2553 } 2554 2555 /********************************* 2556 * Eliminate frivolous family ivs, that is, 2557 * if we can't eliminate the BIV, then eliminate family ivs that 2558 * differ from it only by a constant. 2559 */ 2560 2561 private void elimfrivivs(ref loop l) 2562 { 2563 foreach (ref biv; l.Livlist) 2564 { 2565 int nrefs; 2566 2567 if (debugc) printf("elimfrivivs()\n"); 2568 /* Compute number of family ivs for biv */ 2569 const nfams = biv.IVfamily.length; 2570 if (debugc) printf("nfams = %d\n", cast(int)nfams); 2571 2572 /* Compute number of references to biv */ 2573 if (onlyref(biv.IVbasic,l,*biv.IVincr,&nrefs)) 2574 nrefs--; 2575 if (debugc) printf("nrefs = %d\n",nrefs); 2576 assert(nrefs + 1 >= nfams); 2577 if (nrefs > nfams || // if we won't eliminate the biv 2578 (!I16 && nrefs == nfams)) 2579 { /* Eliminate any family ivs that only differ by a constant */ 2580 /* from biv */ 2581 foreach (ref fl; biv.IVfamily) 2582 { 2583 elem *ec1 = fl.c1; 2584 targ_llong c; 2585 2586 if (elemisone(ec1) || 2587 // Eliminate fl's that can be represented by 2588 // an addressing mode 2589 (!I16 && ec1.Eoper == OPconst && tyintegral(ec1.Ety) && 2590 ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8) 2591 ) 2592 ) 2593 { 2594 fl.FLtemp = FLELIM; 2595 2596 debug 2597 if (debugc) 2598 { 2599 printf("Eliminating frivolous IV "); 2600 WReqn(*fl.FLpelem); 2601 printf("\n"); 2602 } 2603 } 2604 } 2605 } 2606 } 2607 } 2608 2609 2610 /****************************** 2611 * Introduce new variables. 2612 */ 2613 2614 private void intronvars(ref loop l) 2615 { 2616 elem *T; 2617 elem *ne; 2618 elem *t2; 2619 elem *C2; 2620 elem *cmul; 2621 tym_t ty,tyr; 2622 2623 if (debugc) printf("intronvars(%p)\n", &l); 2624 foreach (ref biv; l.Livlist) 2625 { 2626 elem *bivinc = *biv.IVincr; /* ptr to increment elem */ 2627 2628 foreach (ref fl; biv.IVfamily) 2629 { /* for each IV in family of biv */ 2630 if (fl.FLtemp == FLELIM) /* if already eliminated */ 2631 continue; 2632 2633 /* If induction variable can be written as a simple function */ 2634 /* of a previous induction variable, skip it. */ 2635 if (funcprev(biv,fl)) 2636 continue; 2637 2638 ty = fl.FLty; 2639 T = el_alloctmp(ty); /* allocate temporary T */ 2640 fl.FLtemp = T.EV.Vsym; 2641 2642 debug 2643 { 2644 if (debugc) printf("intronvars() introduced new variable '%s' of type ",T.EV.Vsym.Sident.ptr); 2645 if (debugc) WRTYxx(ty); 2646 if (debugc) printf("\n"); 2647 } 2648 2649 /* append elem T=biv*C1+C2 to preheader */ 2650 /* ne = biv*C1 */ 2651 tyr = fl.FLivty; /* type of biv */ 2652 ne = el_var(biv.IVbasic); 2653 ne.Ety = tyr; 2654 if (!elemisone(fl.c1)) /* don't multiply ptrs by 1 */ 2655 ne = el_bin(OPmul,tyr,ne,el_copytree(fl.c1)); 2656 if (tyfv(tyr) && tysize(ty) == SHORTSIZE) 2657 ne = el_una(OP32_16,ty,ne); 2658 C2 = el_copytree(fl.c2); 2659 t2 = el_bin(OPadd,ty,ne,C2); /* t2 = ne + C2 */ 2660 ne = el_bin(OPeq,ty,el_copytree(T),t2); 2661 appendelem(ne, &(l.Lpreheader.Belem)); 2662 2663 /* prefix T+=C1*C to elem biv+=C */ 2664 /* Must prefix in case the value of the expression (biv+=C) */ 2665 /* is used by somebody up the tree. */ 2666 cmul = el_bin(OPmul,fl.c1.Ety,el_copytree(fl.c1), 2667 el_copytree(bivinc.EV.E2)); 2668 t2 = el_bin(bivinc.Eoper,ty,el_copytree(T),cmul); 2669 t2 = doptelem(t2,GOALvalue | GOALagain); 2670 *biv.IVincr = el_bin(OPcomma,bivinc.Ety,t2,bivinc); 2671 biv.IVincr = &((*biv.IVincr).EV.E2); 2672 2673 debug 2674 if (debugc) 2675 { 2676 printf("Replacing elem ("); 2677 WReqn(*fl.FLpelem); 2678 printf(") with '%s'\n",T.EV.Vsym.Sident.ptr); 2679 printf("The init elem is ("); 2680 WReqn(ne); 2681 printf(");\nThe increment elem is ("); 2682 WReqn(t2); 2683 printf(")\n"); 2684 } 2685 2686 el_free(*fl.FLpelem); 2687 *fl.FLpelem = T; /* replace elem n with ref to T */ 2688 doflow = true; /* redo flow analysis */ 2689 go.changes++; 2690 } /* for */ 2691 } /* for */ 2692 } 2693 2694 /******************************* 2695 * Determine if induction variable can be rewritten as a simple 2696 * function of a previously generated temporary. 2697 * This can frequently 2698 * generate less code than that of an all-new temporary (especially 2699 * if it is the same as a previous temporary!). 2700 * Input: 2701 * biv Basic induction variable 2702 * fl Item in biv's family list we are looking at 2703 * Returns: 2704 * false Caller should create a new induction variable. 2705 * true *FLpelem is replaced with function of a previous 2706 * induction variable. FLtemp is set to FLELIM to 2707 * indicate this. 2708 */ 2709 2710 private bool funcprev(ref Iv biv, ref famlist fl) 2711 { 2712 tym_t tymin; 2713 int sz; 2714 elem *e1; 2715 elem *e2; 2716 elem *flse1; 2717 2718 debug if (debugc) 2719 printf("funcprev\n"); 2720 2721 foreach (ref fls; biv.IVfamily) 2722 { 2723 if (!fls.FLtemp) // haven't generated a temporary yet 2724 continue; 2725 if (fls.FLtemp == FLELIM) /* no iv we can use here */ 2726 continue; 2727 2728 /* The multipliers must match */ 2729 if (!el_match(fls.c1,fl.c1)) 2730 continue; 2731 2732 /* If the c2's match also, we got it easy */ 2733 if (el_match(fls.c2,fl.c2)) 2734 { 2735 if (tysize(fl.FLty) > tysize(fls.FLtemp.ty())) 2736 continue; /* can't increase size of var */ 2737 flse1 = el_var(fls.FLtemp); 2738 flse1.Ety = fl.FLty; 2739 goto L2; 2740 } 2741 2742 /* The difference is only in the addition. Therefore, replace 2743 *fl.FLpelem with: 2744 case 1: (fl.c2 + (fls.FLtemp - fls.c2)) 2745 case 2: (fls.FLtemp + (fl.c2 - fls.c2)) 2746 */ 2747 e1 = fl.c2; 2748 /* Subtracting relocatables usually generates slow code for */ 2749 /* linkers that can't handle arithmetic on relocatables. */ 2750 if (typtr(fls.c2.Ety)) 2751 { 2752 if (fls.c2.Eoper == OPrelconst && 2753 !(fl.c2.Eoper == OPrelconst && 2754 fl.c2.EV.Vsym == fls.c2.EV.Vsym) 2755 ) 2756 continue; 2757 } 2758 flse1 = el_var(fls.FLtemp); 2759 e2 = flse1; /* assume case 1 */ 2760 tymin = e2.Ety; 2761 if (typtr(fls.c2.Ety)) 2762 { 2763 if (!typtr(tymin)) 2764 { 2765 if (typtr(e1.Ety)) 2766 { 2767 e1 = e2; 2768 e2 = fl.c2; /* case 2 */ 2769 } 2770 else /* can't subtract fptr */ 2771 goto L1; 2772 } 2773 if (tybasic(fls.c2.Ety) == TYhptr) 2774 tymin = TYlong; 2775 else 2776 tymin = I64 ? TYllong : TYint; /* type of (ptr - ptr) */ 2777 } 2778 2779 /* If e1 and fls.c2 are fptrs, and are not from the same */ 2780 /* segment, we cannot subtract them. */ 2781 if (tyfv(e1.Ety) && tyfv(fls.c2.Ety)) 2782 { 2783 if (e1.Eoper != OPrelconst || fls.c2.Eoper != OPrelconst) 2784 goto L1; /* assume expressions have diff segs */ 2785 if (e1.EV.Vsym.Sclass != fls.c2.EV.Vsym.Sclass) 2786 { 2787 L1: 2788 el_free(flse1); 2789 continue; 2790 } 2791 } 2792 2793 /* Some more type checking... */ 2794 sz = tysize(fl.FLty); 2795 if (sz != tysize(e1.Ety) && 2796 sz != tysize(tymin)) 2797 goto L1; 2798 2799 /* Do some type checking (can't add pointers and get a pointer!) */ 2800 //if (typtr(fl.FLty) && typtr(e1.Ety) && typtr(tymin)) 2801 //goto L1; 2802 /* Construct (e1 + (e2 - fls.c2)) */ 2803 flse1 = el_bin(OPadd,fl.FLty, 2804 e1, 2805 el_bin(OPmin,tymin, 2806 e2, 2807 el_copytree(fls.c2))); 2808 if (sz < tysize(tymin) && sz == tysize(e1.Ety)) 2809 { 2810 assert(I16); 2811 flse1.EV.E2 = el_una(OPoffset,fl.FLty,flse1.EV.E2); 2812 } 2813 2814 flse1 = doptelem(flse1,GOALvalue | GOALagain); 2815 fl.c2 = null; 2816 L2: 2817 debug if (debugc) 2818 { 2819 printf("Replacing "); 2820 WReqn(*fl.FLpelem); 2821 printf(" with "); 2822 WReqn(flse1); 2823 printf("\n"); 2824 } 2825 2826 el_free(*fl.FLpelem); 2827 *fl.FLpelem = flse1; 2828 2829 /* Fix the iv so when we do loops again, we won't create */ 2830 /* yet another iv, which is just what funcprev() is supposed */ 2831 /* to prevent. */ 2832 fls.FLtemp.Sflags |= SFLnotbasiciv; 2833 2834 fl.FLtemp = FLELIM; /* mark iv as being gone */ 2835 go.changes++; 2836 doflow = true; 2837 return true; /* it was replaced */ 2838 } 2839 return false; /* need to create a new variable */ 2840 } 2841 2842 /*********************** 2843 * Eliminate basic IVs. 2844 */ 2845 2846 private void elimbasivs(ref loop l) 2847 { 2848 if (debugc) printf("elimbasivs(%p)\n", &l); 2849 foreach (ref biv; l.Livlist) 2850 { 2851 /* Can't eliminate this basic IV if we have a goal for the */ 2852 /* increment elem. */ 2853 // Be careful about Nflags being in a union... 2854 elem* einc = *biv.IVincr; 2855 if (!(einc.Nflags & NFLnogoal)) 2856 continue; 2857 2858 Symbol* X = biv.IVbasic; 2859 assert(symbol_isintab(X)); 2860 tym_t ty = X.ty(); 2861 int refcount; 2862 elem** pref = onlyref(X,l,einc,&refcount); 2863 2864 /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */ 2865 if (pref != null && refcount <= 1) 2866 { 2867 if (!biv.IVfamily.length) 2868 continue; 2869 2870 elem* ref_ = *pref; 2871 2872 /* Replace (X) with (X != 0) */ 2873 if (ref_.Eoper == OPvar) 2874 ref_ = *pref = el_bin(OPne,TYint,ref_,el_long(ref_.Ety,0L)); 2875 2876 const fi = simfl(biv.IVfamily, ty); // find simplest elem in family 2877 if (fi == biv.IVfamily.length) 2878 continue; 2879 famlist* fl = &biv.IVfamily[fi]; 2880 2881 // Don't do the replacement if we would replace a 2882 // signed comparison with an unsigned one 2883 tym_t flty = fl.FLty; 2884 if (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) 2885 flty = touns(flty); 2886 2887 if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && 2888 !(tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) && 2889 tyuns(flty)) 2890 continue; 2891 2892 /* if we have (e relop X), replace it with (X relop e) */ 2893 if (ref_.EV.E2.Eoper == OPvar && ref_.EV.E2.EV.Vsym == X) 2894 { 2895 elem* tmp = ref_.EV.E2; 2896 ref_.EV.E2 = ref_.EV.E1; 2897 ref_.EV.E1 = tmp; 2898 ref_.Eoper = cast(ubyte)swaprel(ref_.Eoper); 2899 } 2900 2901 // If e*c1+c2 would result in a sign change or an overflow 2902 // then we can't do it 2903 if (fl.c1.Eoper == OPconst) 2904 { 2905 targ_llong c1 = el_tolong(fl.c1); 2906 const int sz = tysize(ty); 2907 if (sz == SHORTSIZE && 2908 ((ref_.EV.E2.Eoper == OPconst && 2909 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFL) || 2910 c1 & ~0x7FFFL) 2911 ) 2912 continue; 2913 2914 if (sz == LONGSIZE && 2915 ((ref_.EV.E2.Eoper == OPconst && 2916 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFL) || 2917 c1 & ~0x7FFFFFFFL) 2918 ) 2919 continue; 2920 if (sz == LLONGSIZE && 2921 ((ref_.EV.E2.Eoper == OPconst && 2922 c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFFFFFFFFFL) || 2923 c1 & ~0x7FFFFFFFFFFFFFFFL) 2924 ) 2925 continue; 2926 } 2927 2928 /* If the incr is a decrement, and the relational is < or <=, 2929 * and its unsigned, then don't do it because it could drop below 0. 2930 * https://issues.dlang.org/show_bug.cgi?id=16189 2931 */ 2932 if ((einc.Eoper == OPminass || einc.EV.E2.Eoper == OPconst && el_tolong(einc.EV.E2) < 0) && 2933 (ref_.Eoper == OPlt || ref_.Eoper == OPle) && 2934 (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety))) 2935 continue; 2936 2937 /* If loop started out with a signed conditional that was 2938 * replaced with an unsigned one, don't do it if c2 2939 * is less than 0. 2940 */ 2941 if (ref_.Nflags & NFLtouns && fl.c2.Eoper == OPconst) 2942 { 2943 targ_llong c2 = el_tolong(fl.c2); 2944 if (c2 < 0) 2945 continue; 2946 } 2947 2948 elem *refE2 = el_copytree(ref_.EV.E2); 2949 int refEoper = ref_.Eoper; 2950 2951 /* if c1 < 0 and relop is < <= > >= 2952 then adjust relop as if both sides were multiplied 2953 by -1 2954 */ 2955 if (!tyuns(ty) && 2956 (tyintegral(ty) && el_tolong(fl.c1) < 0 || 2957 tyfloating(ty) && el_toldoubled(fl.c1) < 0.0)) 2958 refEoper = swaprel(refEoper); 2959 2960 /* Replace (X relop e) with (X relop (short)e) 2961 if T is 1 word but e is 2 2962 */ 2963 if (tysize(flty) == SHORTSIZE && 2964 tysize(refE2.Ety) == LONGSIZE) 2965 refE2 = el_una(OP32_16,flty,refE2); 2966 2967 /* replace e with e*c1 + c2 */ 2968 elem* C2 = el_copytree(fl.c2); 2969 elem* fofe = el_bin(OPadd,flty, 2970 el_bin(OPmul,refE2.Ety, 2971 refE2, 2972 el_copytree(fl.c1)), 2973 C2); 2974 fofe = doptelem(fofe,GOALvalue | GOALagain); // fold any constants 2975 2976 if (tyuns(flty) && refEoper == OPge && 2977 fofe.Eoper == OPconst && el_allbits(fofe, 0) && 2978 fl.c2.Eoper == OPconst && !el_allbits(fl.c2, 0)) 2979 { 2980 /* Don't do it if replacement will result in 2981 * an unsigned T>=0 which will be an infinite loop. 2982 */ 2983 el_free(fofe); 2984 continue; 2985 } 2986 2987 if (debugc) printf("Eliminating basic IV '%s'\n",X.Sident.ptr); 2988 2989 debug if (debugc) 2990 { 2991 printf("Comparison replaced: "); 2992 WReqn(ref_); 2993 printf(" with "); 2994 } 2995 2996 el_free(ref_.EV.E2); 2997 ref_.EV.E2 = refE2; 2998 ref_.Eoper = cast(ubyte)refEoper; 2999 3000 elimass(einc); // dump the increment elem 3001 3002 // replace X with T 3003 assert(ref_.EV.E1.EV.Voffset == 0); 3004 ref_.EV.E1.EV.Vsym = fl.FLtemp; 3005 ref_.EV.E1.Ety = flty; 3006 ref_.EV.E2 = fofe; 3007 3008 /* If sizes of expression worked out wrong... 3009 Which can happen if we have (int)ptr==e 3010 */ 3011 if (OTbinary(fofe.Eoper)) // if didn't optimize it away 3012 { 3013 const tym_t fofety = fofe.Ety; 3014 const int sz = tysize(fofety); 3015 tym_t ty1 = fofe.EV.E1.Ety; 3016 const tym_t ty2 = fofe.EV.E2.Ety; 3017 /* Sizes of + expression must all be the same */ 3018 if (sz != tysize(ty1) && 3019 sz != tysize(ty2) 3020 ) 3021 { 3022 if (tyuns(fofety)) // if unsigned comparison 3023 ty1 = touns(ty1); /* to unsigned type */ 3024 fofe.Ety = ty1; 3025 ref_.EV.E1.Ety = ty1; 3026 } 3027 } 3028 3029 /* Fix if leaves of compare are TYfptrs and the compare */ 3030 /* operator is < <= > >=. */ 3031 if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && tyfv(ref_.EV.E1.Ety)) 3032 { 3033 assert(tyfv(ref_.EV.E2.Ety)); 3034 ref_.EV.E1 = el_una(OPoffset,TYuint,ref_.EV.E1); 3035 ref_.EV.E2 = el_una(OPoffset,TYuint,fofe); 3036 } 3037 3038 debug if (debugc) 3039 { 3040 WReqn(ref_); 3041 printf("\n"); 3042 } 3043 3044 go.changes++; 3045 doflow = true; /* redo flow analysis */ 3046 3047 /* if X is live on entry to any successor S outside loop */ 3048 /* prepend elem X=(T-c2)/c1 to S.Belem */ 3049 3050 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3051 { 3052 elem *ne; 3053 block *b; 3054 3055 foreach (bl; ListRange(dfo[i].Bsucc)) 3056 { /* for each successor */ 3057 b = list_block(bl); 3058 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3059 continue; /* inside loop */ 3060 if (!vec_testbit(X.Ssymnum,b.Binlv)) 3061 continue; /* not live */ 3062 3063 C2 = el_copytree(fl.c2); 3064 ne = el_bin(OPmin,ty, 3065 el_var(fl.FLtemp), 3066 C2); 3067 if (tybasic(ne.EV.E1.Ety) == TYfptr && 3068 tybasic(ne.EV.E2.Ety) == TYfptr) 3069 { 3070 ne.Ety = I64 ? TYllong : TYint; 3071 if (tylong(ty) && _tysize[TYint] == 2) 3072 ne = el_una(OPs16_32,ty,ne); 3073 } 3074 3075 ne = el_bin(OPeq,X.ty(), 3076 el_var(X), 3077 el_bin(OPdiv,ne.Ety, 3078 ne, 3079 el_copytree(fl.c1))); 3080 3081 debug if (debugc) 3082 { 3083 printf("Adding ("); 3084 WReqn(ne); 3085 printf(") to exit block B%d\n",b.Bdfoidx); 3086 //elem_print(ne); 3087 } 3088 3089 /* We have to add a new block if there is */ 3090 /* more than one predecessor to b. */ 3091 if (list_next(b.Bpred)) 3092 { 3093 block *bn = block_calloc(); 3094 bn.Btry = b.Btry; 3095 bn.BC = BCgoto; 3096 bn.Bnext = dfo[i].Bnext; 3097 dfo[i].Bnext = bn; 3098 list_append(&(bn.Bsucc),b); 3099 list_append(&(bn.Bpred),dfo[i]); 3100 bl.ptr = cast(void *)bn; 3101 foreach (bl2; ListRange(b.Bpred)) 3102 if (list_block(bl2) == dfo[i]) 3103 { 3104 bl2.ptr = cast(void *)bn; 3105 goto L2; 3106 } 3107 assert(0); 3108 L2: 3109 b = bn; 3110 addblk = true; 3111 } 3112 3113 if (b.Belem) 3114 b.Belem = 3115 el_bin(OPcomma,b.Belem.Ety, 3116 ne,b.Belem); 3117 else 3118 b.Belem = ne; 3119 go.changes++; 3120 doflow = true; /* redo flow analysis */ 3121 } /* for each successor */ 3122 } /* foreach exit block */ 3123 if (addblk) 3124 return; 3125 } 3126 else if (refcount == 0) /* if no uses of IV in loop */ 3127 { 3128 /* Eliminate the basic IV if it is not live on any successor */ 3129 for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3130 { 3131 foreach (bl; ListRange(dfo[i].Bsucc)) 3132 { /* for each successor */ 3133 block *b = list_block(bl); 3134 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3135 continue; /* inside loop */ 3136 if (vec_testbit(X.Ssymnum,b.Binlv)) 3137 goto L1; /* live */ 3138 } 3139 } 3140 3141 if (debugc) printf("No uses, eliminating basic IV '%s' (%p)\n",X.Sident.ptr,X); 3142 3143 /* Remove the (s op= e2) by replacing it with (1 , e2) 3144 * and let later passes remove the (1 ,) nodes. 3145 * Don't remove those nodes here because other biv's may refer 3146 * to them. 3147 */ 3148 { 3149 elem* ei = *biv.IVincr; 3150 ei.Eoper = OPcomma; 3151 ei.EV.E1.Eoper = OPconst; 3152 ei.EV.E1.Ety = TYint; 3153 } 3154 3155 go.changes++; 3156 doflow = true; /* redo flow analysis */ 3157 L1: 3158 } 3159 } /* for */ 3160 } 3161 3162 3163 /*********************** 3164 * Eliminate opeq IVs that are not used outside the loop. 3165 */ 3166 3167 private void elimopeqs(ref loop l) 3168 { 3169 elem **pref; 3170 Symbol *X; 3171 int refcount; 3172 3173 if (debugc) printf("elimopeqs(%p)\n", &l); 3174 //foreach (ref biv; l.Lopeqlist) elem_print(*(biv.IVincr)); 3175 3176 foreach (ref biv; l.Lopeqlist) 3177 { 3178 // Can't eliminate this basic IV if we have a goal for the 3179 // increment elem. 3180 // Be careful about Nflags being in a union... 3181 if (!((*biv.IVincr).Nflags & NFLnogoal)) 3182 continue; 3183 3184 X = biv.IVbasic; 3185 assert(symbol_isintab(X)); 3186 pref = onlyref(X,l,*biv.IVincr,&refcount); 3187 3188 // if only ref of X is of the form (X) or (X relop e) or (e relop X) 3189 if (pref != null && refcount <= 1) 3190 { } 3191 else if (refcount == 0) // if no uses of IV in loop 3192 { // Eliminate the basic IV if it is not live on any successor 3193 uint i; 3194 for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i) // for each exit block 3195 { 3196 foreach (bl; ListRange(dfo[i].Bsucc)) 3197 { // for each successor 3198 block *b = list_block(bl); 3199 if (vec_testbit(b.Bdfoidx,l.Lloop)) 3200 continue; // inside loop 3201 if (vec_testbit(X.Ssymnum,b.Binlv)) 3202 goto L1; // live 3203 } 3204 } 3205 3206 if (debugc) printf("No uses, eliminating opeq IV '%s' (%p)\n",X.Sident.ptr,X); 3207 3208 /* Remove the (s op= e2) by replacing it with (1 , e2) 3209 * and let later passes remove the (1 ,) nodes. 3210 * Don't remove those nodes here because other biv's may refer 3211 * to them, for nodes like (sum += i++) 3212 */ 3213 { 3214 elem* einc = *(biv.IVincr); 3215 einc.Eoper = OPcomma; 3216 einc.EV.E1.Eoper = OPconst; 3217 einc.EV.E1.Ety = TYint; 3218 } 3219 3220 go.changes++; 3221 doflow = true; // redo flow analysis 3222 L1: 3223 } 3224 } 3225 } 3226 3227 /************************** 3228 * Find simplest elem in family. 3229 * Params: 3230 * fams = array of famlist's 3231 * tym = type of basic IV 3232 * Returns: index into fams[] of simplest; fams.length if none found. 3233 */ 3234 3235 extern (D) 3236 private size_t simfl(famlist[] fams, tym_t tym) 3237 { 3238 size_t sofar = fams.length; 3239 3240 foreach (i, ref fl; fams) 3241 { 3242 if (fl.FLtemp == FLELIM) /* no variable, so skip it */ 3243 continue; 3244 /* If size of replacement is less than size of biv, we could */ 3245 /* be in trouble due to loss of precision. */ 3246 if (size(fl.FLtemp.ty()) < size(tym)) 3247 continue; 3248 3249 // pick simplest 3250 sofar = sofar == fams.length ? i 3251 : (flcmp(fams[sofar], fams[i]) ? sofar : i); 3252 } 3253 return sofar; 3254 } 3255 3256 /************************** 3257 * Return simpler of two family elems. 3258 * There is room for improvement, namely if 3259 * f1.c1 = 2, f2.c1 = 27 3260 * then pick f1 because it is a shift. 3261 * Returns: 3262 * true for f1 is simpler, false for f2 is simpler 3263 */ 3264 3265 private bool flcmp(ref famlist f1, ref famlist f2) 3266 { 3267 auto t1 = &(f1.c1.EV); 3268 auto t2 = &(f2.c1.EV); 3269 auto ty = (*f1.FLpelem).Ety; // type of elem 3270 3271 static if (0) 3272 { 3273 printf("f1: c1 = %d, c2 = %d\n",t1.Vshort,f1.c2.EV.Vshort); 3274 printf("f2: c1 = %d, c2 = %d\n",t2.Vshort,f2.c2.EV.Vshort); 3275 WRTYxx((*f1.FLpelem).Ety); 3276 WRTYxx((*f2.FLpelem).Ety); 3277 } 3278 3279 /* Wimp out and just pick f1 if the types don't match */ 3280 if (tysize(ty) == tysize((*f2.FLpelem).Ety)) 3281 { 3282 switch (tybasic(ty)) 3283 { case TYbool: 3284 case TYchar: 3285 case TYschar: 3286 case TYuchar: 3287 if (t2.Vuchar == 1 || 3288 t1.Vuchar != 1 && f2.c2.EV.Vuchar == 0) 3289 goto Lf2; 3290 break; 3291 3292 case TYshort: 3293 case TYushort: 3294 case TYchar16: 3295 case TYwchar_t: // BUG: what about 4 byte wchar_t's? 3296 case_short: 3297 if (t2.Vshort == 1 || 3298 t1.Vshort != 1 && f2.c2.EV.Vshort == 0) 3299 goto Lf2; 3300 break; 3301 3302 case TYsptr: 3303 case TYcptr: 3304 case TYnptr: // BUG: 64 bit pointers? 3305 case TYimmutPtr: 3306 case TYsharePtr: 3307 case TYrestrictPtr: 3308 case TYfgPtr: 3309 case TYnullptr: 3310 case TYint: 3311 case TYuint: 3312 if (_tysize[TYint] == SHORTSIZE) 3313 goto case_short; 3314 else 3315 goto case_long; 3316 3317 case TYlong: 3318 case TYulong: 3319 case TYdchar: 3320 case TYfptr: 3321 case TYvptr: 3322 case TYhptr: 3323 case_long: 3324 if (t2.Vlong == 1 || 3325 t1.Vlong != 1 && f2.c2.EV.Vlong == 0) 3326 goto Lf2; 3327 break; 3328 3329 case TYfloat: 3330 if (t2.Vfloat == 1 || 3331 t1.Vfloat != 1 && f2.c2.EV.Vfloat == 0) 3332 goto Lf2; 3333 break; 3334 3335 case TYdouble: 3336 case TYdouble_alias: 3337 if (t2.Vdouble == 1.0 || 3338 t1.Vdouble != 1.0 && f2.c2.EV.Vdouble == 0) 3339 goto Lf2; 3340 break; 3341 3342 case TYldouble: 3343 if (t2.Vldouble == 1.0 || 3344 t1.Vldouble != 1.0 && f2.c2.EV.Vldouble == 0) 3345 goto Lf2; 3346 break; 3347 3348 case TYllong: 3349 case TYullong: 3350 if (t2.Vllong == 1 || 3351 t1.Vllong != 1 && f2.c2.EV.Vllong == 0) 3352 goto Lf2; 3353 break; 3354 3355 default: 3356 assert(0); 3357 } 3358 } 3359 //printf("picking f1\n"); 3360 return true; 3361 3362 Lf2: 3363 //printf("picking f2\n"); 3364 return false; 3365 } 3366 3367 /************************************ 3368 * Input: 3369 * x basic IV symbol 3370 * incn increment elem for basic IV X. 3371 * Output: 3372 * *prefcount # of references to X other than the increment elem 3373 * Returns: 3374 * If ref of X in loop l is of the form (X relop e) or (e relop X) 3375 * Return the relop elem 3376 * Else 3377 * Return null 3378 */ 3379 3380 private __gshared 3381 { 3382 int count; 3383 elem **nd; 3384 elem *sincn; 3385 Symbol *X; 3386 } 3387 3388 private elem ** onlyref(Symbol *x, ref loop l,elem *incn,int *prefcount) 3389 { 3390 uint i; 3391 3392 //printf("onlyref('%s')\n", x.Sident.ptr); 3393 X = x; /* save some parameter passing */ 3394 assert(symbol_isintab(x)); 3395 sincn = incn; 3396 3397 debug 3398 if (!(X.Ssymnum < globsym.length && incn)) 3399 printf("X = %d, globsym.length = %d, l = %p, incn = %p\n",cast(int) X.Ssymnum,cast(int) globsym.length,&l,incn); 3400 3401 assert(X.Ssymnum < globsym.length && incn); 3402 count = 0; 3403 nd = null; 3404 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3405 { 3406 block *b; 3407 3408 b = dfo[i]; 3409 if (b.Belem) 3410 { 3411 countrefs(&b.Belem,b.BC == BCiftrue); 3412 } 3413 } 3414 3415 static if (0) 3416 { 3417 printf("count = %d, nd = ("); 3418 if (nd) WReqn(*nd); 3419 printf(")\n"); 3420 } 3421 3422 *prefcount = count; 3423 return nd; 3424 } 3425 3426 3427 /****************************** 3428 * Count elems of the form (X relop e) or (e relop X). 3429 * Do not count the node if it is the increment node (sincn). 3430 * Input: 3431 * flag: true if block wants to test the elem 3432 */ 3433 3434 private void countrefs(elem **pn,bool flag) 3435 { 3436 elem *n = *pn; 3437 3438 assert(n); 3439 if (n == sincn) /* if it is the increment elem */ 3440 { 3441 if (OTbinary(n.Eoper)) 3442 countrefs(&n.EV.E2, false); 3443 return; // don't count lvalue 3444 } 3445 if (OTunary(n.Eoper)) 3446 countrefs(&n.EV.E1,false); 3447 else if (OTbinary(n.Eoper)) 3448 { 3449 if (OTrel(n.Eoper)) 3450 { 3451 elem *e1 = n.EV.E1; 3452 3453 assert(e1.Eoper != OPcomma); 3454 if (e1 == sincn && 3455 (e1.Eoper == OPeq || OTopeq(e1.Eoper))) 3456 goto L1; 3457 3458 /* Check both subtrees to see if n is the comparison node, 3459 * that is, if X is a leaf of the comparison. 3460 */ 3461 if (e1.Eoper == OPvar && e1.EV.Vsym == X && !countrefs2(n.EV.E2) || 3462 n.EV.E2.Eoper == OPvar && n.EV.E2.EV.Vsym == X && !countrefs2(e1)) 3463 nd = pn; /* found the relop node */ 3464 } 3465 L1: 3466 countrefs(&n.EV.E1,false); 3467 countrefs(&n.EV.E2,(flag && n.Eoper == OPcomma)); 3468 } 3469 else if ((n.Eoper == OPvar || n.Eoper == OPrelconst) && n.EV.Vsym == X) 3470 { 3471 if (flag) 3472 nd = pn; /* comparing it with 0 */ 3473 count++; /* found another reference */ 3474 } 3475 } 3476 3477 /******************************* 3478 * Count number of times symbol X appears in elem tree e. 3479 */ 3480 3481 private int countrefs2(elem *e) 3482 { 3483 elem_debug(e); 3484 while (OTunary(e.Eoper)) 3485 e = e.EV.E1; 3486 if (OTbinary(e.Eoper)) 3487 return countrefs2(e.EV.E1) + countrefs2(e.EV.E2); 3488 return ((e.Eoper == OPvar || e.Eoper == OPrelconst) && 3489 e.EV.Vsym == X); 3490 } 3491 3492 /**************************** 3493 * Eliminate some special cases. 3494 */ 3495 3496 private void elimspec(ref loop l) 3497 { 3498 uint i; 3499 3500 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3501 { 3502 block *b; 3503 3504 b = dfo[i]; 3505 if (b.Belem) 3506 elimspecwalk(&b.Belem); 3507 } 3508 } 3509 3510 3511 /****************************** 3512 */ 3513 3514 private void elimspecwalk(elem **pn) 3515 { 3516 elem *n; 3517 3518 n = *pn; 3519 assert(n); 3520 if (OTunary(n.Eoper)) 3521 elimspecwalk(&n.EV.E1); 3522 else if (OTbinary(n.Eoper)) 3523 { 3524 elimspecwalk(&n.EV.E1); 3525 elimspecwalk(&n.EV.E2); 3526 if (OTrel(n.Eoper)) 3527 { 3528 elem *e1 = n.EV.E1; 3529 3530 /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3). 3531 * This will reduce the number of cases for elimbasivs(). 3532 * Don't do equivalent with (e1 rel (e2,e3)) because 3533 * of potential side effects in e1. 3534 */ 3535 if (e1.Eoper == OPcomma) 3536 { 3537 elem *e; 3538 3539 debug if (debugc) 3540 { printf("3rewriting ("); WReqn(n); printf(")\n"); } 3541 3542 e = n.EV.E2; 3543 n.EV.E2 = e1; 3544 n.EV.E1 = n.EV.E2.EV.E1; 3545 n.EV.E2.EV.E1 = n.EV.E2.EV.E2; 3546 n.EV.E2.EV.E2 = e; 3547 n.EV.E2.Eoper = n.Eoper; 3548 n.EV.E2.Ety = n.Ety; 3549 n.Eoper = OPcomma; 3550 3551 go.changes++; 3552 doflow = true; 3553 3554 elimspecwalk(&n.EV.E1); 3555 elimspecwalk(&n.EV.E2); 3556 } 3557 3558 /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3)) 3559 * Rewrite ((X ++ e2) rel e3) into ((X += e2),(X-e2 rel e3)) 3560 * so that the op= will not have a goal, so elimbasivs() 3561 * will work on it. 3562 */ 3563 if ((OTopeq(e1.Eoper) 3564 || OTpost(e1.Eoper) 3565 ) && 3566 !el_sideeffect(e1.EV.E1)) 3567 { 3568 elem *e; 3569 OPER op; 3570 3571 debug if (debugc) 3572 { printf("4rewriting ("); WReqn(n); printf(")\n"); } 3573 3574 e = el_calloc(); 3575 el_copy(e,n); 3576 e.EV.E1 = el_copytree(e1.EV.E1); 3577 e.EV.E1.Ety = n.EV.E1.Ety; 3578 n.EV.E2 = e; 3579 switch (e1.Eoper) 3580 { 3581 case OPpostinc: 3582 e1.Eoper = OPaddass; 3583 op = OPmin; 3584 goto L3; 3585 3586 case OPpostdec: 3587 e1.Eoper = OPminass; 3588 op = OPadd; 3589 L3: e.EV.E1 = el_bin(op,e.EV.E1.Ety,e.EV.E1,el_copytree(e1.EV.E2)); 3590 break; 3591 3592 default: 3593 break; 3594 } 3595 /* increment node is now guaranteed to have no goal */ 3596 e1.Nflags |= NFLnogoal; 3597 n.Eoper = OPcomma; 3598 //go.changes++; 3599 doflow = true; 3600 3601 elimspecwalk(&n.EV.E1); 3602 elimspecwalk(&n.EV.E2); 3603 } 3604 } 3605 } 3606 } 3607 3608 /******** 3609 * Walk e in execution order. 3610 * When eincrement is found, remove it. 3611 * Continue, replacing instances of `v` with `v+increment` 3612 * When second eincrement is found, stop. 3613 * Params: 3614 * e = expression to walk 3615 * defnum = index of eincrement 3616 * v = increment variable 3617 * increment = amount to increment v 3618 * unrolls = number of times loop has been unrolled 3619 */ 3620 3621 private void unrollWalker(elem* e, uint defnum, Symbol* v, targ_llong increment, int unrolls) nothrow 3622 { 3623 int state = 0; 3624 3625 /*********************************** 3626 * Walk e in execution order, fixing it according to state. 3627 * state == 0..unrolls-1: when eincrement is found, remove it, advance to next state 3628 * state == 1..unrolls-1: replacing instances of v with v+(state*increment), 3629 * state == unrolls-1: leave eincrement alone, advance to next state 3630 * state == unrolls: done 3631 */ 3632 3633 void walker(elem *e) 3634 { 3635 assert(e); 3636 const op = e.Eoper; 3637 if (ERTOL(e)) 3638 { 3639 if (e.Edef != defnum) 3640 { 3641 walker(e.EV.E2); 3642 walker(e.EV.E1); 3643 } 3644 } 3645 else if (OTbinary(op)) 3646 { 3647 if (e.Edef != defnum) 3648 { 3649 walker(e.EV.E1); 3650 walker(e.EV.E2); 3651 } 3652 } 3653 else if (OTunary(op)) 3654 { 3655 assert(e.Edef != defnum); 3656 walker(e.EV.E1); 3657 } 3658 else if (op == OPvar && 3659 state && 3660 e.EV.Vsym == v) 3661 { 3662 // overwrite e with (v+increment) 3663 elem *e1 = el_calloc(); 3664 el_copy(e1,e); 3665 e.Eoper = OPadd; 3666 e.EV.E1 = e1; 3667 e.EV.E2 = el_long(e.Ety, increment * state); 3668 } 3669 if (OTdef(op) && e.Edef == defnum) 3670 { 3671 // found the increment elem; neuter all but the last one 3672 if (state + 1 < unrolls) 3673 { 3674 el_free(e.EV.E1); 3675 el_free(e.EV.E2); 3676 e.Eoper = OPconst; 3677 e.EV.Vllong = 0; 3678 } 3679 ++state; 3680 } 3681 } 3682 3683 walker(e); 3684 assert(state == unrolls); 3685 } 3686 3687 3688 /********************************* 3689 * Unroll loop if possible. 3690 * Params: 3691 * l = loop to unroll 3692 * Returns: 3693 * true if loop was unrolled 3694 */ 3695 bool loopunroll(ref loop l) 3696 { 3697 const bool log = false; 3698 if (log) printf("loopunroll(%p)\n", &l); 3699 3700 /* Do not repeatedly unroll the same loop, 3701 * or waste time attempting to 3702 */ 3703 if (l.Lhead.Bflags & BFLnounroll) 3704 return false; 3705 l.Lhead.Bflags |= BFLnounroll; 3706 if (log) WRfunc(); 3707 3708 if (l.Lhead.Btry || l.Ltail.Btry) 3709 return false; 3710 3711 /* For simplification, only unroll loops that consist only 3712 * of a head and tail, and the tail is the exit block. 3713 */ 3714 int numblocks = 0; 3715 for (int i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i) // for each block in loop 3716 ++numblocks; 3717 if (numblocks != 2) 3718 { 3719 if (log) printf("\tnot 2 blocks, but %d\n", numblocks); 3720 return false; 3721 } 3722 assert(l.Lhead != l.Ltail); 3723 3724 /* tail must be the sole exit block 3725 */ 3726 if (vec_testbit(l.Lhead.Bdfoidx, l.Lexit) || 3727 !vec_testbit(l.Ltail.Bdfoidx, l.Lexit)) 3728 { 3729 if (log) printf("\ttail not sole exit block\n"); 3730 return false; 3731 } 3732 3733 elem *ehead = l.Lhead.Belem; 3734 elem *etail = l.Ltail.Belem; 3735 3736 if (log) 3737 { 3738 printf("Unroll candidate:\n"); 3739 printf(" head B%d:\t", l.Lhead.Bdfoidx); WReqn(l.Lhead.Belem); printf("\n"); 3740 printf(" tail B%d:\t", l.Ltail.Bdfoidx); WReqn(l.Ltail.Belem); printf("\n"); 3741 } 3742 3743 /* Tail must be of the form: (v < c) or (v <= c) where v is an unsigned integer 3744 */ 3745 if ((etail.Eoper != OPlt && etail.Eoper != OPle) || 3746 etail.EV.E1.Eoper != OPvar || 3747 etail.EV.E2.Eoper != OPconst) 3748 { 3749 if (log) printf("\tnot (v < c)\n"); 3750 return false; 3751 } 3752 3753 elem *e1 = etail.EV.E1; 3754 elem *e2 = etail.EV.E2; 3755 3756 if (!tyintegral(e1.Ety) || 3757 tysize(e1.Ety) > targ_llong.sizeof || 3758 !(tyuns(e1.Ety) || tyuns(e2.Ety))) 3759 { 3760 if (log) printf("\tnot (integral unsigned)\n"); 3761 return false; 3762 } 3763 3764 int cost = el_length(ehead); 3765 //printf("test4 cost: %d\n", cost); 3766 3767 if (cost > 100) 3768 { 3769 if (log) printf("\tcost %d\n", cost); 3770 return false; 3771 } 3772 if (log) printf("cost %d\n", cost); 3773 3774 Symbol* v = e1.EV.Vsym; 3775 3776 // RD info is only reliable for registers and autos 3777 if (!(sytab[v.Sclass] & SCRD) || !(v.Sflags & SFLunambig)) 3778 { 3779 if (log) printf("\tnot SCRD\n"); 3780 return false; 3781 } 3782 3783 /* Find the initial, increment elem, and final value of s 3784 */ 3785 elem *einitial; 3786 elem *eincrement; 3787 if (!findloopparameters(etail, einitial, eincrement)) 3788 { 3789 if (log) printf("\tnot findloopparameters()\n"); 3790 return false; 3791 } 3792 3793 targ_llong initial = el_tolong(einitial.EV.E2); 3794 targ_llong increment = el_tolong(eincrement.EV.E2); 3795 if (eincrement.Eoper == OPpostdec || eincrement.Eoper == OPminass) 3796 increment = -increment; 3797 targ_llong final_ = el_tolong(e2); 3798 3799 if (log) printf("initial = %lld, increment = %lld, final = %lld\n",cast(long)initial,cast(long)increment,cast(long)final_); 3800 3801 if (etail.Eoper == OPle) 3802 ++final_; 3803 3804 if (initial < 0 || 3805 final_ < initial || 3806 increment <= 0 || 3807 (final_ - initial) % increment) 3808 { 3809 if (log) printf("\tnot (evenly divisible)\n"); 3810 return false; 3811 } 3812 3813 /* If loop would only execute once anyway, just remove the test at the end 3814 */ 3815 if (initial + increment == final_) 3816 { 3817 if (log) printf("\tjust once\n"); 3818 etail.Eoper = OPcomma; 3819 e2.EV.Vllong = 0; 3820 e2.Ety = etail.Ety; 3821 return false; 3822 } 3823 3824 /* number of times the loop is unrolled 3825 */ 3826 targ_ullong numIterations = (final_ - initial) / increment; 3827 const int unrolls = (numIterations < 1000 / cost) 3828 ? cast(int)numIterations 3829 : 2; 3830 3831 if (unrolls == 0 || (final_ - initial) % unrolls) 3832 { 3833 if (log) printf("\tnot (divisible by %d)\n", unrolls); 3834 return false; 3835 } 3836 3837 if (log) printf("Unrolling starting\n"); 3838 3839 // Double the increment 3840 eincrement.EV.E2.EV.Vllong *= unrolls; 3841 //printf(" 4head:\t"); WReqn(l.Lhead.Belem); printf("\n"); 3842 3843 elem* e = null; 3844 foreach (i; 0 .. unrolls) 3845 e = el_combine(e, el_copytree(ehead)); 3846 3847 /* Walk e in execution order. 3848 * When eincrement is found, remove it. 3849 * Continue, replacing instances of `v` with `v+increment` 3850 * When last eincrement is found, stop. 3851 */ 3852 unrollWalker(e, eincrement.Edef, v, increment, unrolls); 3853 3854 l.Lhead.Belem = e; 3855 3856 /* If unrolled loop would only execute once anyway, just remove the test at the end 3857 */ 3858 if (initial + unrolls * increment == final_) 3859 { 3860 if (log) printf("\tcompletely unrolled\n"); 3861 etail.Eoper = OPcomma; 3862 e2.EV.Vllong = 0; 3863 e2.Ety = etail.Ety; 3864 } 3865 3866 //WRfunc(); 3867 return true; 3868 } 3869 3870 /****************************** 3871 * Count number of elems in a tree 3872 * Params: 3873 * e = tree 3874 * Returns: 3875 * number of elems in tree 3876 */ 3877 int el_length(elem *e) 3878 { 3879 int n = 0; 3880 while (e) 3881 { 3882 n += 1; 3883 if (!OTleaf(e.Eoper)) 3884 { 3885 if (e.Eoper == OPctor || e.Eoper == OPdtor) 3886 return 10_000; 3887 n += el_length(e.EV.E2); 3888 e = e.EV.E1; 3889 } 3890 else 3891 break; 3892 } 3893 return n; 3894 } 3895 3896 3897 }