1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1985-1998 by Symantec
6  *              Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gloop.d, backend/gloop.d)
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/gloop.d
11  */
12 
13 
14 module dmd.backend.gloop;
15 
16 version (SCPP)
17     version = COMPILE;
18 version (MARS)
19     version = COMPILE;
20 
21 version (COMPILE)
22 {
23 
24 import core.stdc.stdio;
25 import core.stdc.stdlib;
26 import core.stdc.string;
27 
28 import dmd.backend.cc;
29 import dmd.backend.cdef;
30 import dmd.backend.code_x86;
31 import dmd.backend.evalu8 : el_toldoubled;
32 import dmd.backend.oper;
33 import dmd.backend.global;
34 import dmd.backend.goh;
35 import dmd.backend.el;
36 import dmd.backend.outbuf;
37 import dmd.backend.symtab;
38 import dmd.backend.ty;
39 import dmd.backend.type;
40 
41 import dmd.backend.barray;
42 import dmd.backend.dlist;
43 import dmd.backend.dvec;
44 import dmd.backend.mem;
45 
46 nothrow:
47 
48 char symbol_isintab(Symbol *s) { return sytab[s.Sclass] & SCSS; }
49 
50 extern (C++):
51 
52 bool findloopparameters(elem* erel, ref elem* rdeq, ref elem* rdinc);
53 
54 alias Loops = Rarray!loop;
55 
56 
57 /*********************************
58  * Loop data structure.
59  */
60 
61 struct loop
62 {
63 nothrow:
64     vec_t Lloop;        // Vector of blocks in this loop
65     vec_t Lexit;        // Vector of exit blocks of loop
66     block *Lhead;       // Pointer to header of loop
67     block *Ltail;       // Pointer to tail
68     block *Lpreheader;  // Pointer to preheader (if any)
69     Barray!(elem*) Llis; // loop invariant elems moved to Lpreheader, so
70                         // redundant temporaries aren't created
71     Rarray!Iv Livlist;        // basic induction variables
72     Rarray!Iv Lopeqlist;      // list of other op= variables
73 
74     /*************************
75      * Reset memory so this allocation can be re-used.
76      */
77     void reset()
78     {
79         vec_free(Lloop);
80         vec_free(Lexit);
81 
82         foreach (ref iv; Livlist)
83             iv.reset();
84         foreach (ref iv; Lopeqlist)
85             iv.reset();
86 
87         Llis.reset();
88         Livlist.reset();
89         Lopeqlist.reset();
90     }
91 
92     /***********************
93      * Write loop.
94      */
95 
96     void print()
97     {
98         debug
99         {
100             loop *l = &this;
101             printf("loop %p\n", l);
102             printf("\thead: B%d, tail: B%d, prehead: B%d\n",l.Lhead.Bdfoidx,
103                 l.Ltail.Bdfoidx,(l.Lpreheader ) ? l.Lpreheader.Bdfoidx
104                                                 : cast(uint)-1);
105             printf("\tLloop "); vec_println(l.Lloop);
106             printf("\tLexit "); vec_println(l.Lexit);
107         }
108     }
109 }
110 
111 struct famlist
112 {
113 nothrow:
114     elem **FLpelem;         /* parent of elem in the family         */
115     elem *c1;
116     elem *c2;               // c1*(basic IV) + c2
117     Symbol *FLtemp;         // symbol index of temporary (FLELIM if */
118                             /* this entry has no temporary)         */
119     tym_t FLty;             /* type of this induction variable      */
120     tym_t FLivty;           /* type of the basic IV elem (which is  */
121                             /* not necessarilly the type of the IV  */
122                             /* elem!)                               */
123 
124     void reset()
125     {
126         el_free(c1);
127         el_free(c2);
128     }
129 
130     void print() const
131     {
132         debug
133         {
134             printf("famlist:\n");
135             printf("*FLpelem:\n");
136             elem_print(*FLpelem);
137             printf("c1:");
138             elem_print(c1);
139             printf("c2:");
140             elem_print(c2);
141             printf("FLty = "); WRTYxx(FLty);
142             printf("\nFLivty = "); WRTYxx(FLivty);
143             printf("\n");
144         }
145     }
146 }
147 
148 enum FLELIM = cast(Symbol *)-1;
149 
150 struct Iv
151 {
152 nothrow:
153     Symbol *IVbasic;        // symbol of basic IV
154     elem **IVincr;          // pointer to parent of IV increment elem
155     Barray!famlist IVfamily;      // variables in this family
156 
157     void reset()
158     {
159         foreach (ref fl; IVfamily)
160         {
161             fl.reset();
162         }
163         IVfamily.reset();
164     }
165 
166     void print() const
167     {
168         debug
169         {
170             printf("IV: '%s'\n",IVbasic.Sident.ptr);
171             printf("*IVincr:\n");
172             elem_print(*IVincr);
173         }
174     }
175 }
176 
177 
178 private __gshared bool addblk;                    /* if TRUE, then we added a block */
179 
180 /* is elem loop invariant?      */
181 int isLI(const elem* n) { return n.Nflags & NFLli; }
182 
183 /* make elem loop invariant     */
184 void makeLI(elem* n) { n.Nflags |= NFLli; }
185 
186 /******************************
187  *      Only variables that could only be unambiguously defined
188  *      are candidates for loop invariant removal and induction
189  *      variables.
190  *      This means only variables that have the SFLunambig flag
191  *      set for them.
192  *      Doing this will still cover 90% (I hope) of the cases, and
193  *      is a lot faster to compute.
194  */
195 
196 /*************
197  * Free loops.
198  */
199 
200 private void freeloop(ref Loops loops)
201 {
202     foreach (ref loop; loops)
203         loop.reset();
204     loops.reset();
205 }
206 
207 
208 /**********************************
209  * Initialize block information.
210  * Returns:
211  *      !=0     contains BCasm block
212  */
213 
214 int blockinit()
215 {
216     bool hasasm = false;
217 
218     assert(dfo);
219     uint i = 0;
220     foreach (b; BlockRange(startblock))
221     {
222         debug                   /* check integrity of Bpred and Bsucc   */
223           L1:
224             foreach (blp; ListRange(b.Bpred))
225             {
226                 foreach (bls; ListRange(list_block(blp).Bsucc))
227                     if (list_block(bls) == b)
228                         continue L1;
229                 assert(0);
230             }
231 
232         ++i;
233         if (b.BC == BCasm)
234             hasasm = true;
235                                         /* compute number of blocks     */
236     }
237     foreach (j, b; dfo[])
238     {
239         assert(b.Bdfoidx == j);
240         b.Bdom = vec_realloc(b.Bdom, dfo.length); /* alloc Bdom vectors */
241         vec_clear(b.Bdom);
242     }
243     return hasasm;
244 }
245 
246 /****************************************
247  * Compute dominators (Bdom) for each block.
248  * See Aho & Ullman Fig. 13.5.
249  * Note that flow graph is reducible if there is only one
250  * pass through the loop.
251  * Input:
252  *      dfo[]
253  * Output:
254  *      fills in the Bdom vector for each block
255  */
256 
257 void compdom()
258 {
259     compdom(dfo[]);
260 }
261 
262 private extern (D) void compdom(block*[] dfo)
263 {
264     assert(dfo.length);
265     block* sb = dfo[0];                  // starting block
266 
267     vec_clear(sb.Bdom);
268     vec_setbit(0,sb.Bdom);               // starting block only doms itself
269     foreach (b; dfo)                     // for all except startblock
270     {
271         if (b != sb)
272             vec_set(b.Bdom);             // dominate all blocks
273     }
274 
275     vec_t t1 = vec_calloc(vec_numbits(sb.Bdom));       // allocate a temporary
276     uint cntr = 0;                       // # of times thru loop
277     bool chgs;
278     do
279     {
280         chgs = false;
281         foreach (i, b; dfo)              // for each block in dfo[]
282         {
283             if (i == 0)
284                 continue;                // except startblock
285             if (b.Bpred)                 // if there are predecessors
286             {
287                 vec_set(t1);
288                 foreach (bl; ListRange(b.Bpred))
289                 {
290                     vec_andass(t1,list_block(bl).Bdom);
291                 }
292             }
293             else
294                 vec_clear(t1);      // no predecessors to dominate
295             vec_setbit(i,t1);       // each block doms itself
296             if (chgs)
297                 vec_copy(b.Bdom,t1);
298             else if (!vec_equal(b.Bdom,t1))   // if any changes
299             {
300                 vec_copy(b.Bdom,t1);
301                 chgs = true;
302             }
303         }
304         cntr++;
305         assert(cntr < 50);              // should have converged by now
306     } while (chgs);
307     vec_free(t1);
308 
309     debug if (debugc)
310     {
311         printf("Flow graph is%s reducible\n", cntr <= 2 ? "".ptr : " not".ptr);
312     }
313 }
314 
315 /***************************
316  * Return !=0 if block A dominates block B.
317  */
318 
319 bool dom(const block* A, const block* B)
320 {
321     assert(A && B && dfo && dfo[A.Bdfoidx] == A);
322     return vec_testbit(A.Bdfoidx,B.Bdom) != 0;
323 }
324 
325 /**********************
326  * Find all the loops.
327  */
328 
329 private extern (D) void findloops(block*[] dfo, ref Loops loops)
330 {
331     freeloop(loops);
332 
333     //printf("findloops()\n");
334     foreach (b; dfo)
335         b.Bweight = 1;             // reset Bweights
336     foreach_reverse (b; dfo)       // for each block (note reverse
337                                    // dfo order, so most nested
338                                    // loops are found first)
339     {
340         assert(b);
341         foreach (bl; ListRange(b.Bsucc))
342         {
343             block *s = list_block(bl);      // each successor s to b
344             assert(s);
345             if (dom(s,b))                   // if s dominates b
346                 buildloop(loops, s, b);     // we found a loop
347         }
348     }
349 
350     debug if (debugc)
351     {
352         foreach (ref l; loops)
353             l.print();
354     }
355 }
356 
357 /********************************
358  */
359 
360 private uint loop_weight(uint weight, int factor) pure
361 {
362     // Be careful not to overflow
363     if (weight < 0x1_0000)
364         weight *= 10 * factor;
365     else if (weight < 0x10_0000)
366         weight *= 2 * factor;
367     else
368         weight += factor;
369     assert(cast(int)weight > 0);
370     return weight;
371 }
372 
373 /*****************************
374  * Construct natural loop.
375  * Algorithm 13.1 from Aho & Ullman.
376  * Note that head dom tail.
377  */
378 
379 private void buildloop(ref Loops ploops,block *head,block *tail)
380 {
381     loop *l;
382 
383     //printf("buildloop()\n");
384     /* See if this is part of an existing loop. If so, merge the two.     */
385     foreach (ref lp; ploops)
386         if (lp.Lhead == head)           /* two loops with same header   */
387         {
388             vec_t v;
389 
390             // Calculate loop contents separately so we get the Bweights
391             // done accurately.
392 
393             v = vec_calloc(dfo.length);
394             vec_setbit(head.Bdfoidx,v);
395             head.Bweight = loop_weight(head.Bweight, 1);
396             insert(tail,v);
397 
398             vec_orass(lp.Lloop,v);      // merge into existing loop
399             vec_free(v);
400 
401             vec_clear(lp.Lexit);        // recompute exit blocks
402             l = &lp;
403             goto L1;
404         }
405 
406     /* Allocate loop entry        */
407     l = ploops.push();
408 
409     l.Lloop = vec_calloc(dfo.length);    // allocate loop bit vector
410     l.Lexit = vec_calloc(dfo.length);    // bit vector for exit blocks
411     l.Lhead = head;
412     l.Ltail = tail;
413     l.Lpreheader = null;
414 
415     vec_setbit(head.Bdfoidx,l.Lloop);    /* add head to the loop         */
416     head.Bweight = loop_weight(head.Bweight, 2);  // *20 usage for loop header
417 
418     insert(tail,l.Lloop);                /* insert tail in loop          */
419 
420 L1:
421     /* Find all the exit blocks (those blocks with
422      * successors outside the loop).
423      */
424 
425     // for each block in this loop
426     for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)
427     {
428         if (dfo[i].BC == BCret || dfo[i].BC == BCretexp || dfo[i].BC == BCexit)
429             vec_setbit(i,l.Lexit); /* ret blocks are exit blocks */
430         else
431         {
432             foreach (bl; ListRange(dfo[i].Bsucc))
433                 if (!vec_testbit(list_block(bl).Bdfoidx,l.Lloop))
434                 {
435                     vec_setbit(i,l.Lexit);
436                     break;
437                 }
438         }
439     }
440 
441     /*  Find preheader, if any, to the loop.
442         The preheader is a block that has only the head as a successor.
443         All other predecessors of head must be inside the loop.
444      */
445     l.Lpreheader = null;
446     foreach (bl; ListRange(head.Bpred))
447     {
448         block *b = list_block(bl);
449 
450         if (!vec_testbit(b.Bdfoidx,l.Lloop))  /* if not in loop       */
451         {
452             if (l.Lpreheader)                 /* if already one       */
453             {
454                 l.Lpreheader = null;          /* can only be one      */
455                 break;
456             }
457             else
458             {
459                 if (list_next(b.Bsucc))       // if more than 1 successor
460                     break;                    // b can't be a preheader
461                 l.Lpreheader = b;
462             }
463         }
464     }
465 }
466 
467 /********************************
468  * Support routine for buildloop().
469  * Add a block b and all its predecessors to loop lv.
470  */
471 
472 private void insert(block *b, vec_t lv)
473 {
474     assert(b && lv);
475     if (!vec_testbit(b.Bdfoidx,lv))     /* if block is not in loop      */
476     {
477         vec_setbit(b.Bdfoidx,lv);       /* add block to loop            */
478         b.Bweight = loop_weight(b.Bweight,1);   // *10 usage count
479         foreach (bl; ListRange(b.Bpred))
480             insert(list_block(bl),lv);  /* insert all its predecessors  */
481     }
482 }
483 
484 /**************************************
485  * Perform loop rotations.
486  * Loop starts as:
487  *
488  *         prehead
489  *          |
490  *          v
491  *      +->head---->
492  *      |   |
493  *      |   v
494  *      |  body
495  *      |   |
496  *      |   v
497  *      +--tail
498  *
499  * Two types are done:
500  *      1) Header is moved to be past the tail.
501  *
502  *         prehead
503  *          |
504  *      +---+
505  *      |
506  *      |  body<-+
507  *      |   |    |
508  *      |   v    |
509  *      |  tail  |
510  *      |   |    |
511  *      |   v    |
512  *      +->head--+
513  *          |
514  *          v
515  *
516  *      2) Header is copied past the tail (done only if MFtime is set).
517  *
518  *         prehead
519  *          |
520  *          v
521  *         head1-----+
522  *          |        |
523  *          v        |
524  *         body<--+  |
525  *          |     |  |
526  *          v     |  |
527  *         tail   |  |
528  *          |     |  |
529  *          v     |  |
530  *         head2--+  |
531  *          |        |
532  *          +--------+
533  *          v
534  *
535  * Input:
536  *      Loop information (do not depend on the preheader information)
537  * Output:
538  *      Revised list of blocks, a new dfo and new loop information
539  * Returns:
540  *      true need to recompute loop data
541  */
542 
543 private int looprotate(ref loop l)
544 {
545     block *tail = l.Ltail;
546     block *head = l.Lhead;
547 
548     //printf("looprotate(%p)\n",l);
549 
550     // Do not rotate loop if:
551     if (head == tail ||                         // loop is only one block big
552         !vec_testbit(head.Bdfoidx,l.Lexit))   // header is not an exit block
553         goto Lret;
554 
555     if (//iter != 1 &&
556         vec_testbit(tail.Bdfoidx,l.Lexit))    // tail is an exit block
557         goto Lret;
558 
559     // Do not rotate if already rotated
560     foreach (b; BlockRange(tail.Bnext))
561         if (b == head)                  // if loop already rotated
562             goto Lret;
563 
564     if (head.BC == BCtry)
565          goto Lret;
566     if (head.BC == BC_try)
567          goto Lret;
568 
569     //if (debugc) { printf("looprotate: "); l.print(); }
570 
571     if ((go.mfoptim & MFtime) && head.BC != BCswitch && head.BC != BCasm)
572     {   // Duplicate the header past the tail (but doing
573         // switches would be too expensive in terms of code
574         // generated).
575 
576         auto head2 = block_calloc(); // create new head block
577         head2.Btry = head.Btry;
578         head2.Bflags = head.Bflags;
579         head.Bflags = BFLnomerg;       // move flags over to head2
580         head2.Bflags |= BFLnomerg;
581         head2.BC = head.BC;
582         assert(head2.BC != BCswitch);
583         if (head.Belem)                // copy expression tree
584             head2.Belem = el_copytree(head.Belem);
585         head2.Bnext = tail.Bnext;
586         tail.Bnext = head2;
587 
588         // pred(head1) = pred(head) outside loop
589         // pred(head2) = pred(head) inside loop
590         list_t *pbln;
591         auto pbl2 = &(head2.Bpred);
592         for (list_t *pbl = &(head.Bpred); *pbl; pbl = pbln)
593         {
594             if (vec_testbit(list_block(*pbl).Bdfoidx, l.Lloop))
595             {   // if this predecessor is inside the loop
596 
597                 *pbl2 = *pbl;
598                 *pbl = list_next(*pbl);
599                 pbln = pbl;                     // don't skip this next one
600                 (*pbl2).next = null;
601                 auto bsucc = list_block(*pbl2).Bsucc;
602                 pbl2 = &((*pbl2).next);
603                 foreach (bl; ListRange(bsucc))
604                     if (list_block(bl) == head)
605                     {
606                         bl.ptr = cast(void *)head2;
607                         goto L2;
608                     }
609                 assert(0);
610         L2:
611             }
612             else
613                 pbln = &((*pbl).next);      // next predecessor in list
614         } // for each pred(head)
615 
616         // succ(head2) = succ(head)
617         foreach (bl; ListRange(head.Bsucc))
618         {
619             list_append(&(head2.Bsucc),list_block(bl));
620             list_append(&(list_block(bl).Bpred),head2);
621         }
622         if (debugc) printf("1Rotated loop %p\n", &l);
623         go.changes++;
624         return true;
625     }
626     else if (startblock != head
627             /* This screws up the OPctor/OPdtor sequence for:
628              *   struct CString
629              *   {   CString();
630              *      ~CString();
631              *      int GetLength();
632              *   };
633              *
634              *   void f(void)
635              *   {  for(;;)
636              *      {   CString s ;
637              *    if(s.GetLength()!=0)
638              *       break ;
639              *      }
640              *   }
641              */
642             && !(config.flags3 & CFG3eh)
643             )
644     {   // optimize for space
645         // Simply position the header past the tail
646         foreach (b; BlockRange(startblock))
647         {
648             if (b.Bnext == head)
649             {   // found parent b of head
650                 b.Bnext = head.Bnext;
651                 head.Bnext = tail.Bnext;
652                 tail.Bnext = head;
653                 if (debugc) printf("2Rotated loop %p\n", &l);
654                 go.changes++;
655                 return false;
656             }
657         }
658         assert(0);
659     }
660 Lret:
661     return false;
662 }
663 
664 private __gshared
665 {
666     int gref;                // parameter for markinvar()
667     block *gblock;           // parameter for markinvar()
668     vec_t lv;                // parameter for markinvar()
669     vec_t gin;               // parameter for markinvar()
670     bool doflow;             // true if flow analysis has to be redone
671 }
672 
673 /*********************************
674  * Loop invariant and induction variable elimination.
675  * Input:
676  *      iter    which optimization iteration we are on
677  */
678 
679 void loopopt()
680 {
681     __gshared Loops startloop_cache;
682 
683     Loops startloop = startloop_cache;
684 
685     if (debugc) printf("loopopt()\n");
686 restart:
687     file_progress();
688     if (blockinit())                    // init block data
689     {
690         findloops(dfo[], startloop);    // Compute Bweights
691         freeloop(startloop);            // free existing loops
692         startloop_cache = startloop;
693         return;                         // can't handle ASM blocks
694     }
695     compdom();                          // compute dominators
696     findloops(dfo[], startloop);              // find the loops
697 
698   L3:
699     while (1)
700     {
701         foreach (ref l; startloop)
702         {
703             if (looprotate(l))              // rotate the loop
704             {
705                 compdfo();
706                 blockinit();
707                 compdom();
708                 findloops(dfo[], startloop);
709                 continue L3;
710             }
711         }
712         break;
713     }
714 
715     // Make sure there is a preheader for each loop.
716 
717     addblk = false;                     /* assume no blocks added        */
718     foreach (ref l; startloop)
719     {
720         //if (debugc) l.print();
721 
722         if (!l.Lpreheader)             /* if no preheader               */
723         {
724             block *h;
725             block *p;
726 
727             if (debugc) printf("Generating preheader for loop\n");
728             addblk = true;              /* add one                       */
729             p = block_calloc();         // the preheader
730             h = l.Lhead;               /* loop header                   */
731 
732             /* Find parent of h */
733             if (h == startblock)
734                 startblock = p;
735             else
736             {
737                 for (auto ph = startblock; 1; ph = ph.Bnext)
738                 {
739                     assert(ph);         /* should have found it         */
740                     if (ph.Bnext == h)
741                     {
742                         // Link p into block list between ph and h
743                         ph.Bnext = p;
744                         break;
745                     }
746                 }
747             }
748             p.Bnext = h;
749 
750             l.Lpreheader = p;
751             p.BC = BCgoto;
752             assert(p.Bsucc == null);
753             list_append(&(p.Bsucc),h); /* only successor is h          */
754             p.Btry = h.Btry;
755 
756             if (debugc) printf("Adding preheader %p to loop %p\n",p,&l);
757 
758             // Move preds of h that aren't in the loop to preds of p
759             for (list_t bl = h.Bpred; bl;)
760             {
761                 block *b = list_block(bl);
762 
763                 if (!vec_testbit (b.Bdfoidx, l.Lloop))
764                 {
765                     list_append(&(p.Bpred), b);
766                     list_subtract(&(h.Bpred), b);
767                     bl = h.Bpred;      /* dunno what subtract did      */
768 
769                     /* Fix up successors of predecessors        */
770                     foreach (bls; ListRange(b.Bsucc))
771                         if (list_block(bls) == h)
772                                 bls.ptr = cast(void *)p;
773                 }
774                 else
775                     bl = list_next(bl);
776             }
777             list_append(&(h.Bpred),p); /* p is a predecessor to h      */
778         }
779     } /* for */
780     if (addblk)                         /* if any blocks were added      */
781     {
782         compdfo();                      /* compute depth-first order    */
783         blockinit();
784         compdom();
785         findloops(dfo[], startloop);          // recompute block info
786         addblk = false;
787     }
788 
789     /* Do the loop optimizations.
790      */
791 
792     doflow = true;                      /* do flow analysis             */
793 
794     if (go.mfoptim & MFtime)
795     {
796         if (debugc) printf("Starting loop unrolling\n");
797     L2:
798         while (1)
799         {
800             foreach (ref l; startloop)
801             {
802                 if (loopunroll(l))
803                 {
804                     compdfo();                      // compute depth-first order
805                     blockinit();
806                     compdom();
807                     findloops(dfo[], startloop);    // recompute block info
808                     doflow = true;
809                     continue L2;
810                 }
811             }
812             break;
813         }
814     }
815 
816     /* Note that accessing the loops
817      * starting from startloop will access them in least nested
818      * one first, thus moving LIs out as far as possible
819      */
820     if (debugc) printf("Starting loop invariants\n");
821 
822     foreach_reverse (ref l; startloop)
823     {
824         //if (debugc) l.print();
825 
826         file_progress();
827         assert(l.Lpreheader);
828         if (doflow)
829         {
830             flowrd();               /* compute reaching definitions  */
831             flowlv();               /* compute live variables        */
832             flowae();               // compute available expressions
833             doflow = false;         /* no need to redo it           */
834             if (go.defnod.length == 0)     /* if no definition elems       */
835                 break;              /* no need to optimize          */
836         }
837         lv = l.Lloop;
838         if (debugc) printf("...Loop %p start...\n",&l);
839 
840         /* Unmark all elems in this loop         */
841         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i)
842             if (dfo[i].Belem)
843                 unmarkall(dfo[i].Belem);       /* unmark all elems     */
844 
845         /* Find & mark all LIs   */
846         gin = vec_clone(l.Lpreheader.Bout);
847         vec_t rd = vec_calloc(go.defnod.length);        /* allocate our running RD vector */
848         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) // for each block in loop
849         {
850             block *b = dfo[i];
851 
852             if (debugc) printf("B%d\n",i);
853             if (b.Belem)
854             {
855                 vec_copy(rd, b.Binrd); // IN reaching defs
856                 static if (0)
857                 {
858                     printf("i = %d\n",i);
859                     {
860                         for (int j = 0; j < go.defnod.length; j++)
861                             elem_print(go.defnod[j].DNelem);
862                     }
863                     printf("rd    : "); vec_println(rd);
864                 }
865                 gblock = b;
866                 gref = 0;
867                 if (b != l.Lhead)
868                     gref = 1;
869                 markinvar(b.Belem, rd);
870                 static if (0)
871                 {
872                     printf("B%d\n", i);
873                     {
874                         foreach (j; 0 .. go.defnod.length)
875                         {
876                             printf("  [%2d] ", j);
877                             WReqn(go.defnod[j].DNelem);
878                             printf("\n");
879                         }
880                     }
881                     printf("rd    : "); vec_println(rd);
882                     printf("Boutrd: "); vec_println(b.Boutrd);
883                 }
884                 assert(vec_equal(rd, b.Boutrd));
885             }
886             else
887                 assert(vec_equal(b.Binrd, b.Boutrd));
888         }
889         vec_free(rd);
890         vec_free(gin);
891 
892         /* Move loop invariants  */
893         for (uint i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i)
894         {
895             uint domexit;               // true if this block dominates all
896                                         // exit blocks of the loop
897 
898             for (uint j = 0; (j = cast(uint) vec_index(j, l.Lexit)) < dfo.length; ++j) // for each exit block
899             {
900                 if (!vec_testbit (i, dfo[j].Bdom))
901                 {
902                     domexit = 0;
903                     goto L1;                // break if !(i dom j)
904                 }
905             }
906             // if i dom (all exit blocks)
907             domexit = 1;
908         L1:
909             if (dfo[i].Belem)
910             {   // If there is any hope of making an improvement
911                 if (domexit || l.Llis.length)
912                 {
913                     //if (dfo[i] != l.Lhead)
914                         //domexit |= 2;
915                     movelis(dfo[i].Belem, dfo[i], l, domexit);
916                 }
917             }
918         }
919         if (debugc) printf("...Loop %p done...\n",&l);
920 
921         if (go.mfoptim & MFliv)
922         {
923             loopiv(l);              /* induction variables          */
924             if (addblk)             /* if we added a block          */
925             {
926                 compdfo();
927                 goto restart;       /* play it safe and start over  */
928             }
929         }
930     } /* for */
931     freeloop(startloop);
932     startloop_cache = startloop;
933 }
934 
935 /*****************************
936  * If elem is loop invariant, mark it.
937  * Input:
938  *      lv =    vector of all the blocks in this loop.
939  *      rd =    vector of loop invariants for this elem. This must be
940  *              continually updated.
941  * Note that we do not iterate until no more LIs are found. The only
942  * thing this would buy us is stuff that depends on LI assignments.
943  */
944 
945 private void markinvar(elem *n,vec_t rd)
946 {
947     vec_t tmp;
948     uint i;
949     Symbol *v;
950     elem *n1;
951 
952     assert(n && rd);
953     assert(vec_numbits(rd) == go.defnod.length);
954     switch (n.Eoper)
955     {
956         case OPaddass:  case OPminass:  case OPmulass:  case OPandass:
957         case OPorass:   case OPxorass:  case OPdivass:  case OPmodass:
958         case OPshlass:  case OPshrass:  case OPashrass:
959         case OPpostinc: case OPpostdec:
960         case OPcall:
961         case OPvecsto:
962         case OPcmpxchg:
963             markinvar(n.EV.E2,rd);
964             goto case OPnegass;
965 
966         case OPnegass:
967             n1 = n.EV.E1;
968             if (n1.Eoper == OPind)
969                     markinvar(n1.EV.E1,rd);
970             else if (OTbinary(n1.Eoper))
971             {   markinvar(n1.EV.E1,rd);
972                 markinvar(n1.EV.E2,rd);
973             }
974         L2:
975             if (n.Eoper == OPcall ||
976                 gblock.Btry ||
977                 !(n1.Eoper == OPvar &&
978                     symbol_isintab(n1.EV.Vsym)))
979             {
980                 gref = 1;
981             }
982 
983             updaterd(n,rd,null);
984             break;
985 
986         case OPcallns:
987             markinvar(n.EV.E2,rd);
988             markinvar(n.EV.E1,rd);
989             break;
990 
991         case OPstrcpy:
992         case OPstrcat:
993         case OPmemcpy:
994         case OPmemset:
995             markinvar(n.EV.E2,rd);
996             markinvar(n.EV.E1,rd);
997             updaterd(n,rd,null);
998             break;
999 
1000         case OPbtc:
1001         case OPbtr:
1002         case OPbts:
1003             markinvar(n.EV.E1,rd);
1004             markinvar(n.EV.E2,rd);
1005             updaterd(n,rd,null);
1006             break;
1007 
1008         case OPucall:
1009             markinvar(n.EV.E1,rd);
1010             goto case OPasm;
1011 
1012         case OPasm:
1013             gref = 1;
1014             updaterd(n,rd,null);
1015             break;
1016 
1017         case OPucallns:
1018         case OPstrpar:
1019         case OPstrctor:
1020         case OPvector:
1021         case OPvoid:
1022         case OPstrlen:
1023         case OPddtor:
1024         case OPinp:
1025         case OPprefetch:                // don't mark E2
1026             markinvar(n.EV.E1,rd);
1027             break;
1028 
1029         case OPcond:
1030         case OPparam:
1031         case OPstrcmp:
1032         case OPmemcmp:
1033         case OPbt:                      // OPbt is like OPind, assume not LI
1034         case OPoutp:
1035             markinvar(n.EV.E1,rd);
1036             markinvar(n.EV.E2,rd);
1037             break;
1038 
1039         case OPandand:
1040         case OPoror:
1041             markinvar(n.EV.E1,rd);
1042             tmp = vec_clone(rd);
1043             markinvar(n.EV.E2,tmp);
1044             if (el_returns(n.EV.E2))
1045                 vec_orass(rd,tmp);              // rd |= tmp
1046             vec_free(tmp);
1047             break;
1048 
1049         case OPcolon:
1050         case OPcolon2:
1051             tmp = vec_clone(rd);
1052             switch (el_returns(n.EV.E1) * 2 | int(el_returns(n.EV.E2)))
1053             {
1054                 case 3: // E1 and E2 return
1055                     markinvar(n.EV.E1,rd);
1056                     markinvar(n.EV.E2,tmp);
1057                     vec_orass(rd,tmp);              // rd |= tmp
1058                     break;
1059                 case 2: // E1 returns
1060                     markinvar(n.EV.E1,rd);
1061                     markinvar(n.EV.E2,tmp);
1062                     break;
1063                 case 1: // E2 returns
1064                     markinvar(n.EV.E1,tmp);
1065                     markinvar(n.EV.E2,rd);
1066                     break;
1067                 case 0: // neither returns
1068                     markinvar(n.EV.E1,tmp);
1069                     vec_copy(tmp,rd);
1070                     markinvar(n.EV.E2,tmp);
1071                     break;
1072                 default:
1073                     assert(0);
1074             }
1075             vec_free(tmp);
1076             break;
1077 
1078         case OPaddr:            // mark addresses of OPvars as LI
1079             markinvar(n.EV.E1,rd);
1080             if (n.EV.E1.Eoper == OPvar || isLI(n.EV.E1))
1081                 makeLI(n);
1082             break;
1083 
1084         case OPmsw:
1085         case OPneg:     case OPbool:    case OPnot:     case OPcom:
1086         case OPs16_32:  case OPd_s32:   case OPs32_d:
1087         case OPd_s16:   case OPs16_d:   case OPd_f:     case OPf_d:
1088         case OP32_16:   case OPu8_16:
1089         case OPld_d:    case OPd_ld:
1090         case OPld_u64:
1091         case OPc_r:     case OPc_i:
1092         case OPu16_32:
1093         case OPu16_d:   case OPd_u16:
1094         case OPs8_16:   case OP16_8:
1095         case OPd_u32:   case OPu32_d:
1096 
1097         case OPs32_64:  case OPu32_64:
1098         case OP64_32:
1099         case OPd_s64:   case OPd_u64:
1100         case OPs64_d:
1101         case OPu64_d:
1102         case OP128_64:
1103         case OPs64_128:
1104         case OPu64_128:
1105 
1106         case OPabs:
1107         case OPtoprec:
1108         case OPrndtol:
1109         case OPrint:
1110         case OPsetjmp:
1111         case OPbsf:
1112         case OPbsr:
1113         case OPbswap:
1114         case OPpopcnt:
1115         case OPsqrt:
1116         case OPsin:
1117         case OPcos:
1118         case OPvp_fp: /* BUG for MacHandles */
1119         case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp:
1120         case OPcvp_fp:
1121         case OPvecfill:
1122             markinvar(n.EV.E1,rd);
1123             if (isLI(n.EV.E1))        /* if child is LI               */
1124                 makeLI(n);
1125             break;
1126 
1127         case OPeq:
1128         case OPstreq:
1129             markinvar(n.EV.E2,rd);
1130             n1 = n.EV.E1;
1131             markinvar(n1,rd);
1132 
1133             /* Determine if assignment is LI. Conditions are:       */
1134             /* 1) Rvalue is LI                                      */
1135             /* 2) Lvalue is a variable (simplifies things a lot)    */
1136             /* 3) Lvalue can only be affected by unambiguous defs   */
1137             /* 4) No rd's of lvalue that are within the loop (other */
1138             /*    than the current def)                             */
1139             if (isLI(n.EV.E2) && n1.Eoper == OPvar)          /* 1 & 2 */
1140             {
1141                 v = n1.EV.Vsym;
1142                 if (v.Sflags & SFLunambig)
1143                 {
1144                     tmp = vec_calloc(go.defnod.length);
1145                     //filterrd(tmp,rd,v);
1146                     listrds(rd,n1,tmp,null);
1147                     for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)
1148                         if (go.defnod[i].DNelem != n &&
1149                             vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1150                                 goto L3;
1151                     makeLI(n);      // then the def is LI
1152                 L3: vec_free(tmp);
1153                 }
1154             }
1155             goto L2;
1156 
1157         case OPadd:     case OPmin:     case OPmul:     case OPand:
1158         case OPor:      case OPxor:     case OPdiv:     case OPmod:
1159         case OPshl:     case OPshr:     case OPeqeq:    case OPne:
1160         case OPlt:      case OPle:      case OPgt:      case OPge:
1161         case OPashr:
1162         case OPror:     case OProl:
1163         case OPbtst:
1164 
1165         case OPunord:   case OPlg:      case OPleg:     case OPule:
1166         case OPul:      case OPuge:     case OPug:      case OPue:
1167         case OPngt:     case OPnge:     case OPnlt:     case OPnle:
1168         case OPord:     case OPnlg:     case OPnleg:    case OPnule:
1169         case OPnul:     case OPnuge:    case OPnug:     case OPnue:
1170 
1171         case OPcomma:
1172         case OPpair:
1173         case OPrpair:
1174         case OPremquo:
1175         case OPscale:
1176         case OPyl2x:
1177         case OPyl2xp1:
1178             markinvar(n.EV.E1,rd);
1179             markinvar(n.EV.E2,rd);
1180             if (isLI(n.EV.E2) && isLI(n.EV.E1))
1181                     makeLI(n);
1182             break;
1183 
1184         case OPind:                     /* must assume this is not LI   */
1185             markinvar(n.EV.E1,rd);
1186             if (isLI(n.EV.E1))
1187             {
1188                 static if (0)
1189                 {
1190                     // This doesn't work with C++, because exp2_ptrtocomtype() will
1191                     // transfer const to where it doesn't belong.
1192                     if (n.Ety & mTYconst)
1193                     {
1194                         makeLI(n);
1195                     }
1196 
1197                     // This was disabled because it was marking as LI
1198                     // the loop dimension for the [i] array if
1199                     // a[j][i] was in a loop. This meant the a[j] array bounds
1200                     // check for the a[j].length was skipped.
1201                     else if (n.Ejty)
1202                     {
1203                         tmp = vec_calloc(go.defnod.length);
1204                         filterrdind(tmp,rd,n);  // only the RDs pertaining to n
1205 
1206                         // if (no RDs within loop)
1207                         //      then it's loop invariant
1208 
1209                         for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)  // for each RD
1210                             if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1211                                 goto L10;       // found a RD in the loop
1212 
1213                         // If gref has occurred, this can still be LI
1214                         // if n is an AE that was also an AE at the
1215                         // point of gref.
1216                         // We can catch a subset of these cases by looking
1217                         // at the AEs at the start of the loop.
1218                         if (gref)
1219                         {
1220                             int j;
1221 
1222                             //printf("\tn is: "); WReqn(n); printf("\n");
1223                             for (j = 0; (j = cast(uint) vec_index(j, gin)) < go.exptop; ++j)
1224                             {
1225                                 elem *e = go.expnod[j];
1226 
1227                                 //printf("\t\tgo.expnod[%d] = %p\n",j,e);
1228                                 //printf("\t\tAE is: "); WReqn(e); printf("\n");
1229                                 if (el_match2(n,e))
1230                                 {
1231                                     makeLI(n);
1232                                     //printf("Ind LI: "); WReqn(n); printf("\n");
1233                                     break;
1234                                 }
1235                             }
1236                         }
1237                         else
1238                             makeLI(n);
1239                   L10:
1240                         vec_free(tmp);
1241                         break;
1242                     }
1243                 }
1244             }
1245             break;
1246 
1247         case OPvar:
1248             v = n.EV.Vsym;
1249             if (v.Sflags & SFLunambig)     // must be unambiguous to be LI
1250             {
1251                 tmp = vec_calloc(go.defnod.length);
1252                 //filterrd(tmp,rd,v);       // only the RDs pertaining to v
1253                 listrds(rd,n,tmp,null);  // only the RDs pertaining to v
1254 
1255                 // if (no RDs within loop)
1256                 //  then it's loop invariant
1257 
1258                 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)  // for each RD
1259                     if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1260                         goto L1;    // found a RD in the loop
1261                 makeLI(n);
1262 
1263             L1: vec_free(tmp);
1264             }
1265             break;
1266 
1267         case OPstring:
1268         case OPrelconst:
1269         case OPconst:                   /* constants are always LI      */
1270         case OPframeptr:
1271             makeLI(n);
1272             break;
1273 
1274         case OPinfo:
1275             markinvar(n.EV.E2,rd);
1276             break;
1277 
1278         case OPstrthis:
1279         case OPmark:
1280         case OPctor:
1281         case OPdtor:
1282         case OPdctor:
1283         case OPhalt:
1284         case OPgot:                     // shouldn't OPgot be makeLI ?
1285             break;
1286 
1287         default:
1288             WROP(n.Eoper);
1289             //printf("n.Eoper = %d, OPconst = %d\n", n.Eoper, OPconst);
1290             assert(0);
1291     }
1292 
1293     debug
1294     {
1295         if (debugc && isLI(n))
1296         {
1297             printf("  LI elem: ");
1298             WReqn(n);
1299             printf("\n");
1300         }
1301     }
1302 }
1303 
1304 /**************************************
1305  * Fill in the DefNode.DNumambig vector.
1306  * Set bits defnod[] indices for entries
1307  * which are completely destroyed when e is
1308  * unambiguously assigned to.
1309  * Params:
1310  *      v = vector to fill in
1311  *      e = defnod[] entry that is an assignment to a variable
1312  */
1313 
1314 extern (C)
1315 {
1316 void fillInDNunambig(vec_t v, elem *e)
1317 {
1318     assert(OTassign(e.Eoper));
1319     elem *t = e.EV.E1;
1320     assert(t.Eoper == OPvar);
1321     Symbol *d = t.EV.Vsym;
1322 
1323     targ_size_t toff = t.EV.Voffset;
1324     targ_size_t tsize = (e.Eoper == OPstreq) ? type_size(e.ET) : tysize(t.Ety);
1325     targ_size_t ttop = toff + tsize;
1326 
1327     //printf("updaterd: "); WReqn(n); printf(" toff=%d, tsize=%d\n", toff, tsize);
1328 
1329 
1330     // for all unambig defs in go.defnod[]
1331     foreach (const i; 0 .. go.defnod.length)
1332     {
1333         elem *tn = go.defnod[i].DNelem;
1334         elem *tn1;
1335         targ_size_t tn1size;
1336 
1337         if (!OTassign(tn.Eoper))
1338             continue;
1339 
1340         // If def of same variable, kill that def
1341         tn1 = tn.EV.E1;
1342         if (tn1.Eoper != OPvar || d != tn1.EV.Vsym)
1343             continue;
1344 
1345         // If t completely overlaps tn1
1346         tn1size = (tn.Eoper == OPstreq)
1347             ? type_size(tn.ET) : tysize(tn1.Ety);
1348         if (toff <= tn1.EV.Voffset &&
1349             tn1.EV.Voffset + tn1size <= ttop)
1350         {
1351             vec_setbit(cast(uint)i, v);
1352         }
1353     }
1354 }
1355 }
1356 
1357 /********************
1358  * Update rd vector.
1359  * Input:
1360  *      n       assignment elem or function call elem or OPasm elem
1361  *      rd      reaching def vector to update
1362  *              (clear bits for defs we kill, set bit for n (which is the
1363  *               def we are genning))
1364  *      vecdim  go.defnod.length
1365  */
1366 
1367 extern (C) {
1368 void updaterd(elem *n,vec_t GEN,vec_t KILL)
1369 {
1370     const op = n.Eoper;
1371     elem *t;
1372 
1373     assert(OTdef(op));
1374     assert(GEN);
1375     elem_debug(n);
1376 
1377     uint ni = n.Edef;
1378     assert(ni != -1);
1379 
1380     // If unambiguous def
1381     if (OTassign(op) && (t = n.EV.E1).Eoper == OPvar)
1382     {
1383         vec_t v = go.defnod[ni].DNunambig;
1384         assert(v);
1385         if (KILL)
1386             vec_orass(KILL, v);
1387         vec_subass(GEN, v);
1388     }
1389     else
1390     {
1391         static if (0)
1392         {
1393             if (OTassign(op) && t.Eoper != OPvar && t.Ejty)
1394             {
1395                 // for all unambig defs in go.defnod[]
1396                 foreach (uint i; 0 .. go.defnod.length)
1397                 {
1398                     elem *tn = go.defnod[i].DNelem;
1399                     elem *tn1;
1400 
1401                     if (tn == n)
1402                         ni = i;
1403 
1404                     if (!OTassign(tn.Eoper))
1405                         continue;
1406 
1407                     // If def of same variable, kill that def
1408                     tn1 = tn.EV.E1;
1409                     if (tn1.Eoper != OPind || t.Ejty != tn1.Ejty)
1410                         continue;
1411 
1412                     if (KILL)
1413                         vec_setbit(i,KILL);
1414                     vec_clearbit(i,GEN);
1415                 }
1416             }
1417         }
1418     }
1419 
1420     vec_setbit(ni,GEN);                 // set bit in GEN for this def
1421 }
1422 }
1423 
1424 /***************************
1425  * Mark all elems as not being loop invariant.
1426  */
1427 
1428 private void unmarkall(elem *e)
1429 {
1430     for (; 1; e = e.EV.E1)
1431     {
1432         assert(e);
1433         e.Nflags &= ~NFLli;            /* unmark this elem             */
1434         if (OTunary(e.Eoper))
1435             continue;
1436         else if (OTbinary(e.Eoper))
1437         {
1438             unmarkall(e.EV.E2);
1439             continue;
1440         }
1441         return;
1442     }
1443 }
1444 
1445 
1446 /********************************
1447  * Search for references to v in tree n before nstop is encountered.
1448  * Params:
1449  *      v = symbol to search for
1450  *      n = tree to search
1451  *      nstop = stop searching tree when reaching this elem
1452  * Returns:
1453  *    true if there are any refs of v in n before nstop is encountered
1454  */
1455 
1456 private bool refs(Symbol *v,elem *n,elem *nstop)
1457 {
1458     symbol_debug(v);
1459     assert(symbol_isintab(v));
1460     assert(v.Ssymnum < globsym.length);
1461     bool stop = false;
1462 
1463     // Walk tree in evaluation order
1464     bool walk(elem* n)
1465     {
1466         elem_debug(n);
1467         assert(n);
1468 
1469         if (stop)
1470             return false;
1471         bool f = false;
1472         const op = n.Eoper;
1473         if (OTunary(op))
1474             f = walk(n.EV.E1);
1475         else if (OTbinary(op))
1476         {
1477             if (ERTOL(n))                   /* watch order of evaluation    */
1478             {
1479                 /* Note that (OPvar = e) is not a ref of OPvar, whereas     */
1480                 /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */
1481                 /* a ref of OPvar, etc.                                     */
1482                 f = walk(n.EV.E2);
1483                 if (!f)
1484                 {
1485                     if (op == OPeq)
1486                     {
1487                         if (n.EV.E1.Eoper != OPvar)
1488                             f = walk(n.EV.E1.EV.E1);
1489                     }
1490                     else
1491                         f = walk(n.EV.E1);
1492                 }
1493             }
1494             else
1495                 f = walk(n.EV.E1) || walk(n.EV.E2);
1496         }
1497 
1498         if (n == nstop)
1499             stop = true;
1500         else if (n.Eoper == OPvar)           /* if variable reference        */
1501             return v == n.EV.Vsym;
1502         else if (op == OPasm)                /* everything is referenced     */
1503             return true;
1504         return f;
1505     }
1506 
1507     return walk(n);
1508 }
1509 
1510 /*************************
1511  * Move LIs to preheader.
1512  * Conditions to be satisfied for code motion are:
1513  *      1) All exit blocks are dominated (true before this is called).
1514  *                      -- OR --
1515  *      2) Variable assigned by a statement is not live on entering
1516  *         any successor outside the loop of any exit block of the
1517  *         loop.
1518  *
1519  *      3) Cannot move assignment to variable if there are any other
1520  *         assignments to that variable within the loop (true or
1521  *         assignment would not have been marked LI).
1522  *      4) Cannot move assignments to a variable if there is a use
1523  *         of that variable in this loop that is reached by any other
1524  *         def of it.
1525  *      5) Cannot move expressions that have side effects.
1526  *      6) Do not move assignments to variables that could be affected
1527  *         by ambiguous defs.
1528  *      7) It is not worth it to move expressions of the form:
1529  *              (var == const)
1530  * Input:
1531  *      n       the elem we're considering moving
1532  *      b       the block this elem is in
1533  *      l       the loop we're in
1534  *      domexit flags
1535  *      bit 0:  1       this branch is always executed
1536  *              0       this branch is only sometimes executed
1537  *      bit 1:  1       do not move LIs that could throw exceptions
1538  *                      or cannot be moved past possibly thrown exceptions
1539  * Returns:
1540  *      revised domexit
1541  */
1542 
1543 private void movelis(elem* n, block* b, ref loop l, ref uint pdomexit)
1544 {
1545     vec_t tmp;
1546     elem *ne;
1547     elem *t;
1548     elem *n2;
1549     Symbol *v;
1550     tym_t ty;
1551 
1552 Lnextlis:
1553     //if (isLI(n)) { printf("movelis(B%d, ", b.Bdfoidx); WReqn(n); printf(")\n"); }
1554     assert(n);
1555     elem_debug(n);
1556     const op = n.Eoper;
1557     switch (op)
1558     {
1559         case OPvar:
1560         case OPconst:
1561         case OPrelconst:
1562             goto Lret;
1563 
1564         case OPandand:
1565         case OPoror:
1566         case OPcond:
1567         {
1568             uint domexit;
1569 
1570             movelis(n.EV.E1,b,l,pdomexit);        // always executed
1571             domexit = pdomexit & ~1;   // sometimes executed
1572             movelis(n.EV.E2,b,l,domexit);
1573             pdomexit |= domexit & 2;
1574             goto Lret;
1575         }
1576 
1577         case OPeq:
1578             // Do loop invariant assignments
1579             if (isLI(n) && n.EV.E1.Eoper == OPvar)
1580             {
1581                 v = n.EV.E1.EV.Vsym;          // variable index number
1582 
1583                 if (!(v.Sflags & SFLunambig)) goto L3;         // case 6
1584 
1585                 // If case 4 is not satisfied, return
1586 
1587                 // Function parameters have an implied definition prior to the
1588                 // first block of the function. Unfortunately, the rd vector
1589                 // does not take this into account. Therefore, we assume the
1590                 // worst and reject assignments to function parameters.
1591                 if (v.Sclass == SCparameter || v.Sclass == SCregpar ||
1592                     v.Sclass == SCfastpar || v.Sclass == SCshadowreg)
1593                         goto L3;
1594 
1595                 if (el_sideeffect(n.EV.E2)) goto L3;              // case 5
1596 
1597                 // If case 1 or case 2 is not satisfied, return
1598 
1599                 if (!(pdomexit & 1))                   // if not case 1
1600                 {
1601                     uint i;
1602                     for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
1603                     {
1604                         foreach (bl; ListRange(dfo[i].Bsucc))
1605                         {
1606                             block *s;           // successor to exit block
1607 
1608                             s = list_block(bl);
1609                             if (!vec_testbit(s.Bdfoidx,l.Lloop) &&
1610                                 (!symbol_isintab(v) ||
1611                                  vec_testbit(v.Ssymnum,s.Binlv))) // if v is live on exit
1612                                     goto L3;
1613                         }
1614                     }
1615                 }
1616 
1617                 tmp = vec_calloc(go.defnod.length);
1618                 uint i;
1619                 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
1620                 {
1621                     if (dfo[i] == b)        // except this one
1622                         continue;
1623 
1624                     //<if there are any RDs of v in Binrd other than n>
1625                     //    <if there are any refs of v in that block>
1626                     //        return;
1627 
1628                     //filterrd(tmp,dfo[i].Binrd,v);
1629                     listrds(dfo[i].Binrd,n.EV.E1,tmp,null);
1630                     uint j;
1631                     for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j)  // for each RD of v in Binrd
1632                     {
1633                         if (go.defnod[j].DNelem == n)
1634                             continue;
1635                         if (dfo[i].Belem &&
1636                             refs(v,dfo[i].Belem,cast(elem *)null)) //if refs of v
1637                         {
1638                             vec_free(tmp);
1639                             goto L3;
1640                         }
1641                         break;
1642                     }
1643                 } // foreach
1644 
1645                 // <if there are any RDs of v in b.Binrd other than n>
1646                 //     <if there are any references to v before the
1647                 //      assignment to v>
1648                 //         <can't move this assignment>
1649 
1650                 //filterrd(tmp,b.Binrd,v);
1651                 listrds(b.Binrd,n.EV.E1,tmp,null);
1652                 uint j;
1653                 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j)  // for each RD of v in Binrd
1654                 {
1655                     if (go.defnod[j].DNelem == n)
1656                         continue;
1657                     if (b.Belem && refs(v,b.Belem,n))
1658                     {
1659                         vec_free(tmp);
1660                         goto L3;            // can't move it
1661                     }
1662                     break;                  // avoid redundant looping
1663                 }
1664                 vec_free(tmp);
1665 
1666                 // We have an LI assignment, n.
1667                 // Check to see if the rvalue is already in the preheader.
1668                 foreach (e; l.Llis)
1669                 {
1670                     if (el_match(n.EV.E2, e.EV.E2))
1671                     {
1672                         el_free(n.EV.E2);
1673                         n.EV.E2 = el_calloc();
1674                         el_copy(n.EV.E2, e.EV.E1);
1675                         if (debugc) printf("LI assignment rvalue was replaced\n");
1676                         doflow = true;
1677                         go.changes++;
1678                         break;
1679                     }
1680                 }
1681 
1682                 // move assignment elem to preheader
1683                 if (debugc) printf("Moved LI assignment ");
1684 
1685                 debug
1686                     if (debugc)
1687                     {
1688                         WReqn(n);
1689                         printf(";\n");
1690                     }
1691 
1692                 go.changes++;
1693                 doflow = true;                  // redo flow analysis
1694                 ne = el_calloc();
1695                 el_copy(ne,n);                  // create assignment elem
1696                 assert(l.Lpreheader);          // make sure there is one
1697                 appendelem(ne,&(l.Lpreheader.Belem)); // append ne to preheader
1698                 l.Llis.push(ne);
1699 
1700                 el_copy(n,ne.EV.E1);      // replace n with just a reference to v
1701                 goto Lret;
1702             } // if
1703             break;
1704 
1705         case OPcall:
1706         case OPucall:
1707             pdomexit |= 2;
1708             break;
1709 
1710         case OPpair:
1711         case OPrpair:                   // don't move these, as they do not do computation
1712             movelis(n.EV.E1,b,l,pdomexit);
1713             n = n.EV.E2;
1714             goto Lnextlis;
1715 
1716         default:
1717             break;
1718     }
1719 
1720 L3:
1721     // Do leaves of non-LI expressions, leaves of = elems that didn't
1722     // meet the invariant assignment removal criteria, and don't do leaves
1723     if (OTleaf(op))
1724         goto Lret;
1725     if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot ||
1726       // These are usually addressing modes, so moving them is a net loss
1727       (I32 && op == OPshl && n.EV.E2.Eoper == OPconst && el_tolong(n.EV.E2) <= 3UL)
1728      )
1729     {
1730         if (OTassign(op))
1731         {
1732             elem *n1 = n.EV.E1;
1733             elem *n11;
1734 
1735             if (OTbinary(op))
1736                 movelis(n.EV.E2,b,l,pdomexit);
1737 
1738             // Do lvalue only if it is an expression
1739             if (n1.Eoper == OPvar)
1740                 goto Lret;
1741             n11 = n1.EV.E1;
1742             if (OTbinary(n1.Eoper))
1743             {
1744                 movelis(n11,b,l,pdomexit);
1745                 n = n1.EV.E2;
1746             }
1747             // If *(x + c), just make x the LI, not the (x + c).
1748             // The +c comes free with the addressing mode.
1749             else if (n1.Eoper == OPind &&
1750                     isLI(n11) &&
1751                     n11.Eoper == OPadd &&
1752                     n11.EV.E2.Eoper == OPconst
1753                     )
1754             {
1755                 n = n11.EV.E1;
1756             }
1757             else
1758                 n = n11;
1759             movelis(n,b,l,pdomexit);
1760             if (b.Btry || !(n1.Eoper == OPvar && symbol_isintab(n1.EV.Vsym)))
1761             {
1762                 //printf("assign to global => domexit |= 2\n");
1763                 pdomexit |= 2;
1764             }
1765         }
1766         else if (OTunary(op))
1767         {
1768             elem *e1 = n.EV.E1;
1769 
1770             // If *(x + c), just make x the LI, not the (x + c).
1771             // The +c comes free with the addressing mode.
1772             if (op == OPind &&
1773                 isLI(e1) &&
1774                 e1.Eoper == OPadd &&
1775                 e1.EV.E2.Eoper == OPconst
1776                )
1777             {
1778                 n = e1.EV.E1;
1779             }
1780             else
1781                 n = e1;
1782         }
1783         else if (OTbinary(op))
1784         {
1785             movelis(n.EV.E1,b,l,pdomexit);
1786             n = n.EV.E2;
1787         }
1788         goto Lnextlis;
1789   }
1790 
1791   if (el_sideeffect(n))
1792         goto Lret;
1793 
1794     static if (0)
1795     {
1796         printf("*pdomexit = %u\n", pdomexit);
1797         if (pdomexit & 2)
1798         {
1799             // If any indirections, can't LI it
1800 
1801             // If this operand has already been indirected, we can let
1802             // it pass.
1803             Symbol *s;
1804 
1805             printf("looking at:\n");
1806             elem_print(n);
1807             s = el_basesym(n.EV.E1);
1808             if (s)
1809             {
1810                 foreach (el; l.Llis)
1811                 {
1812                     el = el.EV.E2;
1813                     elem_print(el);
1814                     if (el.Eoper == OPind && el_basesym(el.EV.E1) == s)
1815                     {
1816                         printf("  pass!\n");
1817                         goto Lpass;
1818                     }
1819                 }
1820             }
1821             printf("  skip!\n");
1822             goto Lret;
1823 
1824         Lpass:
1825         }
1826     }
1827 
1828     // Move the LI expression to the preheader
1829     if (debugc) printf("Moved LI expression ");
1830 
1831     debug
1832         if (debugc)
1833         {
1834             WReqn(n);
1835             printf(";\n");
1836         }
1837 
1838     // See if it's already been moved
1839     ty = n.Ety;
1840     foreach (el; l.Llis)
1841     {
1842         tym_t ty2;
1843 
1844         //printf("existing LI: "); WReqn(el); printf("\n");
1845         ty2 = el.EV.E2.Ety;
1846         if (tysize(ty) == tysize(ty2))
1847         {
1848             el.EV.E2.Ety = ty;
1849             if (el_match(n,el.EV.E2))
1850             {
1851                 el.EV.E2.Ety = ty2;
1852                 if (!OTleaf(n.Eoper))
1853                 {       el_free(n.EV.E1);
1854                         if (OTbinary(n.Eoper))
1855                                 el_free(n.EV.E2);
1856                 }
1857                 el_copy(n,el.EV.E1);      // make copy of temp
1858                 n.Ety = ty;
1859 
1860                 debug
1861                     if (debugc)
1862                     {   printf("Already moved: LI expression replaced with ");
1863                         WReqn(n);
1864                         printf("\nPreheader %d expression %p ",
1865                         l.Lpreheader.Bdfoidx,l.Lpreheader.Belem);
1866                         WReqn(l.Lpreheader.Belem);
1867                         printf("\n");
1868                     }
1869 
1870                 go.changes++;
1871                 doflow = true;                  // redo flow analysis
1872                 goto Lret;
1873             }
1874             el.EV.E2.Ety = ty2;
1875         }
1876     }
1877 
1878     if (!(pdomexit & 1))                       // if only sometimes executed
1879     {
1880         if (debugc) printf(" doesn't dominate exit\n");
1881         goto Lret;                              // don't move LI
1882     }
1883 
1884     if (tyaggregate(n.Ety))
1885         goto Lret;
1886 
1887     go.changes++;
1888     doflow = true;                              // redo flow analysis
1889 
1890     t = el_alloctmp(n.Ety);                     /* allocate temporary t */
1891 
1892     debug
1893     {
1894         if (debugc) printf("movelis() introduced new variable '%s' of type ",t.EV.Vsym.Sident.ptr);
1895         if (debugc) WRTYxx(t.Ety);
1896         if (debugc) printf("\n");
1897     }
1898 
1899     n2 = el_calloc();
1900     el_copy(n2,n);                              /* create copy n2 of n  */
1901     ne = el_bin(OPeq,t.Ety,t,n2);               /* create elem t=n2     */
1902     assert(l.Lpreheader);                       /* make sure there is one */
1903     appendelem(ne,&(l.Lpreheader.Belem));       /* append ne to preheader */
1904 
1905     debug
1906         if (debugc)
1907         {
1908             printf("Preheader %d expression %p\n\t",
1909             l.Lpreheader.Bdfoidx,l.Lpreheader.Belem);
1910             WReqn(l.Lpreheader.Belem);
1911             printf("\nLI expression replaced with "); WReqn(t);
1912             printf("\n");
1913         }
1914 
1915     el_copy(n,t);                                 /* replace this elem with t */
1916 
1917     // Remember LI expression in elem list
1918     l.Llis.push(ne);
1919 
1920 Lret:
1921 
1922 }
1923 
1924 /***************************
1925  * Append elem to existing elem using an OPcomma elem.
1926  * Input:
1927  *      n       elem to append
1928  *      *pn     elem to append to
1929  */
1930 
1931 private void appendelem(elem *n,elem **pn)
1932 {
1933     assert(n && pn);
1934     if (*pn)                                    /* if this elem exists  */
1935     {
1936         while ((*pn).Eoper == OPcomma)          /* while we see OPcomma elems */
1937         {
1938             (*pn).Ety = n.Ety;
1939             pn = &((*pn).EV.E2);                /* cruise down right side */
1940         }
1941         *pn = el_bin(OPcomma,n.Ety,*pn,n);
1942     }
1943     else
1944         *pn = n;                                /* else create a elem   */
1945 }
1946 
1947 /************** LOOP INDUCTION VARIABLES **********************/
1948 
1949 /****************************
1950  * Create a new famlist entry.
1951  */
1952 
1953 private void newfamlist(famlist* fl, tym_t ty)
1954 {
1955     eve c = void;
1956     memset(&c,0,c.sizeof);
1957 
1958     fl.FLty = ty;
1959     switch (tybasic(ty))
1960     {   case TYfloat:
1961             c.Vfloat = 1;
1962             break;
1963 
1964         case TYdouble:
1965         case TYdouble_alias:
1966             c.Vdouble = 1;
1967             break;
1968 
1969         case TYldouble:
1970             c.Vldouble = 1;
1971             break;
1972 
1973         case TYbool:
1974         case TYchar:
1975         case TYschar:
1976         case TYuchar:
1977             c.Vchar = 1;
1978             break;
1979 
1980         case TYshort:
1981         case TYushort:
1982         case TYchar16:
1983         case TYwchar_t:             // BUG: what about 4 byte wchar_t's?
1984             c.Vshort = 1;
1985             break;
1986 
1987         case TYsptr:
1988         case TYcptr:
1989         case TYfptr:
1990         case TYvptr:
1991         case TYnptr:
1992         case TYnullptr:
1993         case TYimmutPtr:
1994         case TYsharePtr:
1995         case TYrestrictPtr:
1996         case TYfgPtr:
1997             ty = TYint;
1998             if (I64)
1999                 ty = TYllong;
2000             goto case TYuint;
2001 
2002         case TYint:
2003         case TYuint:
2004             c.Vint = 1;
2005             break;
2006 
2007         case TYhptr:
2008             ty = TYlong;
2009             goto case TYlong;
2010 
2011         case TYlong:
2012         case TYulong:
2013         case TYdchar:
2014         default:
2015             c.Vlong = 1;
2016             break;
2017     }
2018     fl.c1 = el_const(ty,&c);               /* c1 = 1               */
2019     c.Vldouble = 0;
2020     if (typtr(ty))
2021     {
2022         ty = TYint;
2023         if (tybasic(ty) == TYhptr)
2024             ty = TYlong;
2025         if (I64)
2026             ty = TYllong;
2027     }
2028     fl.c2 = el_const(ty,&c);               /* c2 = 0               */
2029 }
2030 
2031 /***************************
2032  * Remove induction variables from loop l.
2033  * Loop invariant removal should have been done just previously.
2034  */
2035 
2036 private void loopiv(ref loop l)
2037 {
2038     if (debugc) printf("loopiv(%p)\n", &l);
2039     assert(l.Livlist.length == 0 && l.Lopeqlist.length == 0);
2040     elimspec(l);
2041     if (doflow)
2042     {
2043         flowrd();               /* compute reaching defs                */
2044         flowlv();               /* compute live variables               */
2045         flowae();               // compute available expressions
2046         doflow = false;
2047     }
2048     findbasivs(l);              /* find basic induction variables       */
2049     findopeqs(l);               // find op= variables
2050     findivfams(l);              /* find IV families                     */
2051     elimfrivivs(l);             /* eliminate less useful family IVs     */
2052     intronvars(l);              /* introduce new variables              */
2053     elimbasivs(l);              /* eliminate basic IVs                  */
2054     if (!addblk)                // adding a block changes the Binlv
2055         elimopeqs(l);           // eliminate op= variables
2056 
2057     foreach (ref iv; l.Livlist)
2058         iv.reset();
2059     l.Livlist.reset();          // reset for reuse
2060 
2061     foreach (ref iv; l.Lopeqlist)
2062         iv.reset();
2063     l.Lopeqlist.reset();        // reset for reuse
2064 
2065     /* Do copy propagation and dead assignment elimination        */
2066     /* upon return to optfunc()                                   */
2067 }
2068 
2069 /*************************************
2070  * Find basic IVs of loop l.
2071  * A basic IV x of loop l is a variable x which has
2072  * exactly one assignment within l of the form:
2073  * x += c or x -= c, where c is either a constant
2074  * or a LI.
2075  * Input:
2076  *      go.defnod[] loaded with all the definition elems of the loop
2077  */
2078 
2079 private void findbasivs(ref loop l)
2080 {
2081     vec_t poss,notposs;
2082     elem *n;
2083     bool ambdone;
2084 
2085     ambdone = false;
2086     poss = vec_calloc(globsym.length);
2087     notposs = vec_calloc(globsym.length);  /* vector of all variables      */
2088                                         /* (initially all unmarked)     */
2089 
2090     /* for each def in go.defnod[] that is within loop l     */
2091 
2092     foreach (const i; 0 .. go.defnod.length)
2093     {
2094         if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop))
2095             continue;               /* def is not in the loop       */
2096 
2097         n = go.defnod[i].DNelem;
2098         elem_debug(n);
2099         if (OTassign(n.Eoper) && n.EV.E1.Eoper == OPvar)
2100         {
2101             Symbol *s;                  /* if unambiguous def           */
2102 
2103             s = n.EV.E1.EV.Vsym;
2104             if (symbol_isintab(s))
2105             {
2106                 SYMIDX v;
2107 
2108                 v = n.EV.E1.EV.Vsym.Ssymnum;
2109                 if ((n.Eoper == OPaddass || n.Eoper == OPminass ||
2110                      n.Eoper == OPpostinc || n.Eoper == OPpostdec) &&
2111                         (n.EV.E2.Eoper == OPconst || /* if x += c or x -= c          */
2112                          n.EV.E2.Eoper == OPvar && isLI(n.EV.E2)))
2113                 {
2114                     if (vec_testbit(v,poss))
2115                         /* We've already seen this def elem,    */
2116                         /* therefore there is more than one     */
2117                         /* def of v within the loop, therefore  */
2118                         /* v is not a basic IV.                 */
2119                         vec_setbit(v,notposs);
2120                     else
2121                         vec_setbit(v,poss);
2122                 }
2123                 else                    /* else mark as not possible    */
2124                     vec_setbit(v,notposs);
2125             }
2126         }
2127         else                            /* else ambiguous def           */
2128         {
2129             /* mark any vars that could be affected by              */
2130             /* this def as not possible                             */
2131 
2132             if (!ambdone)           /* avoid redundant loops        */
2133             {
2134                 foreach (j; 0 .. globsym.length)
2135                 {
2136                     if (!(globsym[j].Sflags & SFLunambig))
2137                         vec_setbit(j,notposs);
2138                 }
2139                 ambdone = true;
2140             }
2141         }
2142     }
2143     static if (0)
2144     {
2145         printf("poss    "); vec_println(poss);
2146         printf("notposs "); vec_println(notposs);
2147     }
2148     vec_subass(poss,notposs);             /* poss = poss - notposs        */
2149 
2150     /* create list of IVs */
2151     uint i;
2152     for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i)  // for each basic IV
2153     {
2154         Symbol *s;
2155 
2156         /* Skip if we don't want it to be a basic IV (see funcprev())   */
2157         s = globsym[i];
2158         assert(symbol_isintab(s));
2159         if (s.Sflags & SFLnotbasiciv)
2160             continue;
2161 
2162         // Do not use aggregates as basic IVs. This is because the other loop
2163         // code doesn't check offsets into symbols, (assuming everything
2164         // is at offset 0). We could perhaps amend this by allowing basic IVs
2165         // if the struct consists of only one data member.
2166         if (tyaggregate(s.ty()))
2167             continue;
2168 
2169         // Do not use complex types as basic IVs, as the code gen isn't up to it
2170         if (tycomplex(s.ty()))
2171                 continue;
2172 
2173         auto biv = l.Livlist.push();
2174         biv.IVbasic = s;               // symbol of basic IV
2175         biv.IVincr = null;
2176 
2177         if (debugc) printf("Symbol '%s' (%d) is a basic IV, ", s.Sident.ptr, i);
2178 
2179         /* We have the sym idx of the basic IV. We need to find         */
2180         /* the parent of the increment elem for it.                     */
2181 
2182         /* First find the go.defnod[]      */
2183         foreach (j; 0 .. go.defnod.length)
2184         {
2185             /* If go.defnod is a def of i and it is in the loop        */
2186             if (go.defnod[j].DNelem.EV.E1 &&     /* OPasm are def nodes  */
2187                 go.defnod[j].DNelem.EV.E1.EV.Vsym == s &&
2188                 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop))
2189             {
2190                 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem));
2191                 assert(s == (*biv.IVincr).EV.E1.EV.Vsym);
2192 
2193                 debug if (debugc)
2194                 {
2195                     printf("Increment elem is: "); WReqn(*biv.IVincr);     printf("\n");
2196                 }
2197                 goto L1;
2198             }
2199         }
2200         assert(0);                      /* should have found it         */
2201         /* NOTREACHED */
2202 
2203     L1:
2204     }
2205 
2206     vec_free(poss);
2207     vec_free(notposs);
2208 }
2209 
2210 /*************************************
2211  * Find op= elems of loop l.
2212  * Analogous to findbasivs().
2213  * Used to eliminate useless loop code normally found in benchmark programs.
2214  * Input:
2215  *      go.defnod[] loaded with all the definition elems of the loop
2216  */
2217 
2218 private void findopeqs(ref loop l)
2219 {
2220     vec_t poss,notposs;
2221     elem *n;
2222     bool ambdone;
2223 
2224     ambdone = false;
2225     poss = vec_calloc(globsym.length);
2226     notposs = vec_calloc(globsym.length);  // vector of all variables
2227                                         // (initially all unmarked)
2228 
2229     // for each def in go.defnod[] that is within loop l
2230 
2231     foreach (i; 0 .. go.defnod.length)
2232     {
2233         if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop))
2234             continue;               // def is not in the loop
2235 
2236         n = go.defnod[i].DNelem;
2237         elem_debug(n);
2238         if (OTopeq(n.Eoper) && n.EV.E1.Eoper == OPvar)
2239         {
2240             Symbol *s;                  // if unambiguous def
2241 
2242             s = n.EV.E1.EV.Vsym;
2243             if (symbol_isintab(s))
2244             {
2245                 SYMIDX v;
2246 
2247                 v = n.EV.E1.EV.Vsym.Ssymnum;
2248                 {
2249                     if (vec_testbit(v,poss))
2250                         // We've already seen this def elem,
2251                         // therefore there is more than one
2252                         // def of v within the loop, therefore
2253                         // v is not a basic IV.
2254                         vec_setbit(v,notposs);
2255                     else
2256                         vec_setbit(v,poss);
2257                 }
2258             }
2259         }
2260         else                            // else ambiguous def
2261         {
2262             // mark any vars that could be affected by
2263             // this def as not possible
2264 
2265             if (!ambdone)           // avoid redundant loops
2266             {
2267                 foreach (j; 0 .. globsym.length)
2268                 {
2269                     if (!(globsym[j].Sflags & SFLunambig))
2270                         vec_setbit(j,notposs);
2271                 }
2272                 ambdone = true;
2273             }
2274         }
2275     }
2276 
2277     // Don't use symbols already in Livlist
2278     foreach (ref iv; l.Livlist)
2279     {
2280         vec_setbit(iv.IVbasic.Ssymnum,notposs);
2281     }
2282 
2283 
2284     static if (0)
2285     {
2286         printf("poss    "); vec_println(poss);
2287         printf("notposs "); vec_println(notposs);
2288     }
2289 
2290     vec_subass(poss,notposs);           // poss = poss - notposs
2291 
2292     // create list of IVs
2293     uint i;
2294     for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.length; ++i)  // for each opeq IV
2295     {
2296         Symbol *s;
2297 
2298         s = globsym[i];
2299         assert(symbol_isintab(s));
2300 
2301         // Do not use aggregates as basic IVs. This is because the other loop
2302         // code doesn't check offsets into symbols, (assuming everything
2303         // is at offset 0). We could perhaps amend this by allowing basic IVs
2304         // if the struct consists of only one data member.
2305         if (tyaggregate(s.ty()))
2306             continue;
2307 
2308         auto biv = l.Lopeqlist.push();
2309         biv.IVbasic = s;               // symbol of basic IV
2310         biv.IVincr = null;
2311 
2312         if (debugc) printf("Symbol '%s' (%d) is an opeq IV, ",s.Sident.ptr,i);
2313 
2314         // We have the sym idx of the basic IV. We need to find
2315         // the parent of the increment elem for it.
2316 
2317         // First find the go.defnod[]
2318         foreach (j; 0 .. go.defnod.length)
2319         {
2320             // If go.defnod is a def of i and it is in the loop
2321             if (go.defnod[j].DNelem.EV.E1 &&     // OPasm are def nodes
2322                 go.defnod[j].DNelem.EV.E1.EV.Vsym == s &&
2323                 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop))
2324             {
2325                 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem));
2326                 assert(s == (*biv.IVincr).EV.E1.EV.Vsym);
2327 
2328                 debug if (debugc)
2329                 {
2330                     printf("Opeq elem is: "); WReqn(*biv.IVincr);  printf("\n");
2331                 }
2332                 goto Lcont;
2333             }
2334         }
2335         assert(0);                      // should have found it
2336         // NOTREACHED
2337 
2338     Lcont:
2339     }
2340 
2341     vec_free(poss);
2342     vec_free(notposs);
2343 }
2344 
2345 /*****************************
2346  * Find families for each basic IV.
2347  * An IV family is a list of elems of the form
2348  * c1*X+c2, where X is a basic induction variable.
2349  * Note that we do not do divides, because of roundoff error problems.
2350  */
2351 
2352 private void findivfams(ref loop l)
2353 {
2354     if (debugc) printf("findivfams(%p)\n", &l);
2355     foreach (ref biv; l.Livlist)
2356     {
2357         for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
2358             if (dfo[i].Belem)
2359                 ivfamelems(&biv,&(dfo[i].Belem));
2360         /* Fold all the constant expressions in c1 and c2.      */
2361         foreach (ref fl; biv.IVfamily)
2362         {
2363             fl.c1 = doptelem(fl.c1,GOALvalue | GOALagain);
2364             fl.c2 = doptelem(fl.c2,GOALvalue | GOALagain);
2365         }
2366     }
2367 }
2368 
2369 /*************************
2370  * Tree walking support routine for findivfams().
2371  *      biv =   basic induction variable pointer
2372  *      pn      pointer to elem
2373  */
2374 
2375 private void ivfamelems(Iv *biv,elem **pn)
2376 {
2377     tym_t ty,c2ty;
2378     elem *n1;
2379     elem *n2;
2380 
2381     assert(pn);
2382     elem* n = *pn;
2383     assert(biv && n);
2384     const op = n.Eoper;
2385     if (OTunary(op))
2386     {
2387        ivfamelems(biv,&n.EV.E1);
2388         n1 = n.EV.E1;
2389         n2 = null;
2390     }
2391     else if (OTbinary(op))
2392     {
2393         ivfamelems(biv,&n.EV.E1);
2394         ivfamelems(biv,&n.EV.E2); /* LTOR or RTOL order is unimportant */
2395         n1 = n.EV.E1;
2396         n2 = n.EV.E2;
2397     }
2398     else                                /* else leaf elem               */
2399         return;                         /* which can't be in the family */
2400 
2401     if (tycomplex(n.Ety))
2402         return;
2403 
2404     if (op == OPmul || op == OPadd || op == OPmin ||
2405         op == OPneg || op == OPshl)
2406     {   /* Note that we are wimping out and not considering             */
2407         /* LI variables as part of c1 and c2, but only constants.       */
2408 
2409         ty = n.Ety;
2410 
2411         /* Since trees are canonicalized, basic induction variables     */
2412         /* will only appear on the left.                                */
2413 
2414         /* Improvement:                                                 */
2415         /* We wish to pick up the cases (biv + li), (biv - li) and      */
2416         /* (li + biv). OPmul and LS with bivs are out, since if we      */
2417         /* try to eliminate the biv, and the loop test is a >, >=,      */
2418         /* <, <=, we have a problem since we don't know if the li       */
2419         /* is negative. (Would have to call swaprel() on it.)           */
2420 
2421         /* If we have (li + var), swap the leaves.                      */
2422         if (op == OPadd && isLI(n1) && n1.Eoper == OPvar && n2.Eoper == OPvar)
2423         {
2424             n.EV.E1 = n2;
2425             n2 = n.EV.E2 = n1;
2426             n1 = n.EV.E1;
2427         }
2428 
2429         // Get rid of case where we painted a far pointer to a long
2430         if (op == OPadd || op == OPmin)
2431         {
2432             int sz;
2433 
2434             sz = tysize(ty);
2435             if (sz == tysize(TYfptr) && !tyfv(ty) &&
2436                 (sz != tysize(n1.Ety) || sz != tysize(n2.Ety)))
2437                 return;
2438         }
2439 
2440         /* Look for function of basic IV (-biv or biv op const)         */
2441         if (n1.Eoper == OPvar && n1.EV.Vsym == biv.IVbasic)
2442         {
2443             if (op == OPneg)
2444             {
2445                 if (debugc) printf("found (-biv), elem %p\n",n);
2446                 auto fl = biv.IVfamily.push();
2447                 newfamlist(fl, ty);
2448                 fl.FLivty = n1.Ety;
2449                 fl.FLpelem = pn;
2450                 fl.c1 = el_una(op,ty,fl.c1); /* c1 = -1       */
2451             }
2452             else if (n2.Eoper == OPconst ||
2453                      isLI(n2) && (op == OPadd || op == OPmin))
2454             {
2455                 debug
2456                     if (debugc)
2457                     {   printf("found (biv op const), elem (");
2458                             WReqn(n);
2459                             printf(");\n");
2460                             printf("Types: n1="); WRTYxx(n1.Ety);
2461                             printf(" ty="); WRTYxx(ty);
2462                             printf(" n2="); WRTYxx(n2.Ety);
2463                             printf("\n");
2464                     }
2465 
2466                 auto fl = biv.IVfamily.push();
2467                 newfamlist(fl, ty);
2468                 fl.FLivty = n1.Ety;
2469                 fl.FLpelem = pn;
2470                 switch (op)
2471                 {
2472                     case OPadd:           /* c2 = right           */
2473                         c2ty = n2.Ety;
2474                         if (typtr(fl.c2.Ety))
2475                                 c2ty = fl.c2.Ety;
2476                         goto L1;
2477 
2478                     case OPmin:           /* c2 = -right          */
2479                         c2ty = fl.c2.Ety;
2480                         /* Check for subtracting two pointers */
2481                         if (typtr(c2ty) && typtr(n2.Ety))
2482                         {
2483                             if (tybasic(c2ty) == TYhptr)
2484                                 c2ty = TYlong;
2485                             else
2486                                 c2ty = I64 ? TYllong : TYint;
2487                         }
2488                   L1:
2489                         fl.c2 = el_bin(op,c2ty,fl.c2,el_copytree(n2));
2490                         break;
2491 
2492                     case OPmul:           /* c1 = right           */
2493                     case OPshl:           /* c1 = 1 << right      */
2494                         fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2));
2495                         break;
2496 
2497                     default:
2498                         assert(0);
2499                 }
2500             }
2501         }
2502 
2503         /* Look for function of existing IV                             */
2504 
2505         foreach (ref fl; biv.IVfamily)
2506         {
2507             if (*fl.FLpelem != n1)          /* not it               */
2508                 continue;
2509 
2510             /* Look for (fl op constant)     */
2511             if (op == OPneg)
2512             {
2513                 if (debugc) printf("found (-fl), elem %p\n",n);
2514                 /* c1 = -c1; c2 = -c2; */
2515                 fl.c1 = el_una(OPneg,ty,fl.c1);
2516                 fl.c2 = el_una(OPneg,ty,fl.c2);
2517                 fl.FLty = ty;
2518                 fl.FLpelem = pn;        /* replace with new IV  */
2519             }
2520             else if (n2.Eoper == OPconst ||
2521                      isLI(n2) && (op == OPadd || op == OPmin))
2522             {
2523                 debug
2524                     if (debugc)
2525                     {
2526                         printf("found (fl op const), elem (");
2527                         WReqn(n);
2528                         assert(*pn == n);
2529                         printf(");\n");
2530                         elem_print(n);
2531                     }
2532 
2533                 switch (op)
2534                 {
2535                     case OPmul:
2536                     case OPshl:
2537                         fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2));
2538                         break;
2539 
2540                     case OPadd:
2541                     case OPmin:
2542                         break;
2543 
2544                     default:
2545                         assert(0);
2546                 }
2547                 fl.c2 = el_bin(op,ty,fl.c2,el_copytree(n2));
2548                 fl.FLty = ty;
2549                 fl.FLpelem = pn;        /* replace with new IV  */
2550             } /* else if */
2551         } /* for */
2552     } /* if */
2553 }
2554 
2555 /*********************************
2556  * Eliminate frivolous family ivs, that is,
2557  * if we can't eliminate the BIV, then eliminate family ivs that
2558  * differ from it only by a constant.
2559  */
2560 
2561 private void elimfrivivs(ref loop l)
2562 {
2563     foreach (ref biv; l.Livlist)
2564     {
2565         int nrefs;
2566 
2567         if (debugc) printf("elimfrivivs()\n");
2568         /* Compute number of family ivs for biv */
2569         const nfams = biv.IVfamily.length;
2570         if (debugc) printf("nfams = %d\n", cast(int)nfams);
2571 
2572         /* Compute number of references to biv  */
2573         if (onlyref(biv.IVbasic,l,*biv.IVincr,&nrefs))
2574                 nrefs--;
2575         if (debugc) printf("nrefs = %d\n",nrefs);
2576         assert(nrefs + 1 >= nfams);
2577         if (nrefs > nfams ||            // if we won't eliminate the biv
2578             (!I16 && nrefs == nfams))
2579         {   /* Eliminate any family ivs that only differ by a constant  */
2580             /* from biv                                                 */
2581             foreach (ref fl; biv.IVfamily)
2582             {
2583                 elem *ec1 = fl.c1;
2584                 targ_llong c;
2585 
2586                 if (elemisone(ec1) ||
2587                     // Eliminate fl's that can be represented by
2588                     // an addressing mode
2589                     (!I16 && ec1.Eoper == OPconst && tyintegral(ec1.Ety) &&
2590                      ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8)
2591                     )
2592                    )
2593                 {
2594                     fl.FLtemp = FLELIM;
2595 
2596                     debug
2597                         if (debugc)
2598                         {
2599                             printf("Eliminating frivolous IV ");
2600                             WReqn(*fl.FLpelem);
2601                             printf("\n");
2602                         }
2603                 }
2604             }
2605         }
2606     }
2607 }
2608 
2609 
2610 /******************************
2611  * Introduce new variables.
2612  */
2613 
2614 private void intronvars(ref loop l)
2615 {
2616     elem *T;
2617     elem *ne;
2618     elem *t2;
2619     elem *C2;
2620     elem *cmul;
2621     tym_t ty,tyr;
2622 
2623     if (debugc) printf("intronvars(%p)\n", &l);
2624     foreach (ref biv; l.Livlist)
2625     {
2626         elem *bivinc = *biv.IVincr;   /* ptr to increment elem */
2627 
2628         foreach (ref fl; biv.IVfamily)
2629         {                               /* for each IV in family of biv  */
2630             if (fl.FLtemp == FLELIM)   /* if already eliminated         */
2631                 continue;
2632 
2633             /* If induction variable can be written as a simple function */
2634             /* of a previous induction variable, skip it.                */
2635             if (funcprev(biv,fl))
2636                 continue;
2637 
2638             ty = fl.FLty;
2639             T = el_alloctmp(ty);        /* allocate temporary T          */
2640             fl.FLtemp = T.EV.Vsym;
2641 
2642             debug
2643             {
2644                 if (debugc) printf("intronvars() introduced new variable '%s' of type ",T.EV.Vsym.Sident.ptr);
2645                 if (debugc) WRTYxx(ty);
2646                 if (debugc) printf("\n");
2647             }
2648 
2649             /* append elem T=biv*C1+C2 to preheader */
2650             /* ne = biv*C1      */
2651             tyr = fl.FLivty;                   /* type of biv              */
2652             ne = el_var(biv.IVbasic);
2653             ne.Ety = tyr;
2654             if (!elemisone(fl.c1))             /* don't multiply ptrs by 1 */
2655                 ne = el_bin(OPmul,tyr,ne,el_copytree(fl.c1));
2656             if (tyfv(tyr) && tysize(ty) == SHORTSIZE)
2657                 ne = el_una(OP32_16,ty,ne);
2658             C2 = el_copytree(fl.c2);
2659             t2 = el_bin(OPadd,ty,ne,C2);        /* t2 = ne + C2         */
2660             ne = el_bin(OPeq,ty,el_copytree(T),t2);
2661             appendelem(ne, &(l.Lpreheader.Belem));
2662 
2663             /* prefix T+=C1*C to elem biv+=C                            */
2664             /* Must prefix in case the value of the expression (biv+=C) */
2665             /* is used by somebody up the tree.                         */
2666             cmul = el_bin(OPmul,fl.c1.Ety,el_copytree(fl.c1),
2667                                           el_copytree(bivinc.EV.E2));
2668             t2 = el_bin(bivinc.Eoper,ty,el_copytree(T),cmul);
2669             t2 = doptelem(t2,GOALvalue | GOALagain);
2670             *biv.IVincr = el_bin(OPcomma,bivinc.Ety,t2,bivinc);
2671             biv.IVincr = &((*biv.IVincr).EV.E2);
2672 
2673             debug
2674                 if (debugc)
2675                 {
2676                     printf("Replacing elem (");
2677                     WReqn(*fl.FLpelem);
2678                     printf(") with '%s'\n",T.EV.Vsym.Sident.ptr);
2679                     printf("The init elem is (");
2680                     WReqn(ne);
2681                     printf(");\nThe increment elem is (");
2682                     WReqn(t2);
2683                     printf(")\n");
2684                 }
2685 
2686             el_free(*fl.FLpelem);
2687             *fl.FLpelem = T;           /* replace elem n with ref to T  */
2688             doflow = true;              /* redo flow analysis           */
2689             go.changes++;
2690         } /* for */
2691     } /* for */
2692 }
2693 
2694 /*******************************
2695  * Determine if induction variable can be rewritten as a simple
2696  * function of a previously generated temporary.
2697  * This can frequently
2698  * generate less code than that of an all-new temporary (especially
2699  * if it is the same as a previous temporary!).
2700  * Input:
2701  *      biv             Basic induction variable
2702  *      fl              Item in biv's family list we are looking at
2703  * Returns:
2704  *      false           Caller should create a new induction variable.
2705  *      true            *FLpelem is replaced with function of a previous
2706  *                      induction variable. FLtemp is set to FLELIM to
2707  *                      indicate this.
2708  */
2709 
2710 private bool funcprev(ref Iv biv, ref famlist fl)
2711 {
2712     tym_t tymin;
2713     int sz;
2714     elem *e1;
2715     elem *e2;
2716     elem *flse1;
2717 
2718     debug if (debugc)
2719         printf("funcprev\n");
2720 
2721     foreach (ref fls; biv.IVfamily)
2722     {
2723         if (!fls.FLtemp)                // haven't generated a temporary yet
2724             continue;
2725         if (fls.FLtemp == FLELIM)      /* no iv we can use here        */
2726             continue;
2727 
2728         /* The multipliers must match   */
2729         if (!el_match(fls.c1,fl.c1))
2730             continue;
2731 
2732         /* If the c2's match also, we got it easy */
2733         if (el_match(fls.c2,fl.c2))
2734         {
2735             if (tysize(fl.FLty) > tysize(fls.FLtemp.ty()))
2736                 continue;              /* can't increase size of var   */
2737             flse1 = el_var(fls.FLtemp);
2738             flse1.Ety = fl.FLty;
2739             goto L2;
2740         }
2741 
2742         /* The difference is only in the addition. Therefore, replace
2743            *fl.FLpelem with:
2744                 case 1:         (fl.c2 + (fls.FLtemp - fls.c2))
2745                 case 2:         (fls.FLtemp + (fl.c2 - fls.c2))
2746          */
2747         e1 = fl.c2;
2748         /* Subtracting relocatables usually generates slow code for     */
2749         /* linkers that can't handle arithmetic on relocatables.        */
2750         if (typtr(fls.c2.Ety))
2751         {
2752             if (fls.c2.Eoper == OPrelconst &&
2753                 !(fl.c2.Eoper == OPrelconst &&
2754                   fl.c2.EV.Vsym == fls.c2.EV.Vsym)
2755                )
2756                 continue;
2757         }
2758         flse1 = el_var(fls.FLtemp);
2759         e2 = flse1;                             /* assume case 1        */
2760         tymin = e2.Ety;
2761         if (typtr(fls.c2.Ety))
2762         {
2763             if (!typtr(tymin))
2764             {
2765                 if (typtr(e1.Ety))
2766                 {
2767                     e1 = e2;
2768                     e2 = fl.c2;            /* case 2               */
2769                 }
2770                 else                        /* can't subtract fptr  */
2771                     goto L1;
2772             }
2773             if (tybasic(fls.c2.Ety) == TYhptr)
2774                 tymin = TYlong;
2775             else
2776                 tymin = I64 ? TYllong : TYint;         /* type of (ptr - ptr) */
2777         }
2778 
2779         /* If e1 and fls.c2 are fptrs, and are not from the same       */
2780         /* segment, we cannot subtract them.                            */
2781         if (tyfv(e1.Ety) && tyfv(fls.c2.Ety))
2782         {
2783             if (e1.Eoper != OPrelconst || fls.c2.Eoper != OPrelconst)
2784                 goto L1;                /* assume expressions have diff segs */
2785             if (e1.EV.Vsym.Sclass != fls.c2.EV.Vsym.Sclass)
2786             {
2787                L1:
2788                 el_free(flse1);
2789                 continue;
2790             }
2791         }
2792 
2793         /* Some more type checking...   */
2794         sz = tysize(fl.FLty);
2795         if (sz != tysize(e1.Ety) &&
2796             sz != tysize(tymin))
2797             goto L1;
2798 
2799         /* Do some type checking (can't add pointers and get a pointer!) */
2800         //if (typtr(fl.FLty) && typtr(e1.Ety) && typtr(tymin))
2801             //goto L1;
2802         /* Construct (e1 + (e2 - fls.c2))      */
2803         flse1 = el_bin(OPadd,fl.FLty,
2804                             e1,
2805                             el_bin(OPmin,tymin,
2806                                     e2,
2807                                     el_copytree(fls.c2)));
2808         if (sz < tysize(tymin) && sz == tysize(e1.Ety))
2809         {
2810             assert(I16);
2811             flse1.EV.E2 = el_una(OPoffset,fl.FLty,flse1.EV.E2);
2812         }
2813 
2814         flse1 = doptelem(flse1,GOALvalue | GOALagain);
2815         fl.c2 = null;
2816     L2:
2817         debug if (debugc)
2818         {
2819             printf("Replacing ");
2820             WReqn(*fl.FLpelem);
2821             printf(" with ");
2822             WReqn(flse1);
2823             printf("\n");
2824         }
2825 
2826         el_free(*fl.FLpelem);
2827         *fl.FLpelem = flse1;
2828 
2829         /* Fix the iv so when we do loops again, we won't create        */
2830         /* yet another iv, which is just what funcprev() is supposed    */
2831         /* to prevent.                                                  */
2832         fls.FLtemp.Sflags |= SFLnotbasiciv;
2833 
2834         fl.FLtemp = FLELIM;            /* mark iv as being gone        */
2835         go.changes++;
2836         doflow = true;
2837         return true;                    /* it was replaced              */
2838     }
2839     return false;                       /* need to create a new variable */
2840 }
2841 
2842 /***********************
2843  * Eliminate basic IVs.
2844  */
2845 
2846 private void elimbasivs(ref loop l)
2847 {
2848     if (debugc) printf("elimbasivs(%p)\n", &l);
2849     foreach (ref biv; l.Livlist)
2850     {
2851         /* Can't eliminate this basic IV if we have a goal for the      */
2852         /* increment elem.                                              */
2853         // Be careful about Nflags being in a union...
2854         elem* einc = *biv.IVincr;
2855         if (!(einc.Nflags & NFLnogoal))
2856             continue;
2857 
2858         Symbol* X = biv.IVbasic;
2859         assert(symbol_isintab(X));
2860         tym_t ty = X.ty();
2861         int refcount;
2862         elem** pref = onlyref(X,l,einc,&refcount);
2863 
2864         /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */
2865         if (pref != null && refcount <= 1)
2866         {
2867             if (!biv.IVfamily.length)
2868                 continue;
2869 
2870             elem* ref_ = *pref;
2871 
2872             /* Replace (X) with (X != 0)                            */
2873             if (ref_.Eoper == OPvar)
2874                 ref_ = *pref = el_bin(OPne,TYint,ref_,el_long(ref_.Ety,0L));
2875 
2876             const fi = simfl(biv.IVfamily, ty); // find simplest elem in family
2877             if (fi == biv.IVfamily.length)
2878                 continue;
2879             famlist* fl = &biv.IVfamily[fi];
2880 
2881             // Don't do the replacement if we would replace a
2882             // signed comparison with an unsigned one
2883             tym_t flty = fl.FLty;
2884             if (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety))
2885                 flty = touns(flty);
2886 
2887             if (ref_.Eoper >= OPle && ref_.Eoper <= OPge &&
2888                 !(tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) &&
2889                  tyuns(flty))
2890                     continue;
2891 
2892             /* if we have (e relop X), replace it with (X relop e)  */
2893             if (ref_.EV.E2.Eoper == OPvar && ref_.EV.E2.EV.Vsym == X)
2894             {
2895                 elem* tmp = ref_.EV.E2;
2896                 ref_.EV.E2 = ref_.EV.E1;
2897                 ref_.EV.E1 = tmp;
2898                 ref_.Eoper = cast(ubyte)swaprel(ref_.Eoper);
2899             }
2900 
2901             // If e*c1+c2 would result in a sign change or an overflow
2902             // then we can't do it
2903             if (fl.c1.Eoper == OPconst)
2904             {
2905                 targ_llong c1 = el_tolong(fl.c1);
2906                 const int sz = tysize(ty);
2907                 if (sz == SHORTSIZE &&
2908                     ((ref_.EV.E2.Eoper == OPconst &&
2909                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFL) ||
2910                      c1 & ~0x7FFFL)
2911                    )
2912                     continue;
2913 
2914                 if (sz == LONGSIZE &&
2915                     ((ref_.EV.E2.Eoper == OPconst &&
2916                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFL) ||
2917                      c1 & ~0x7FFFFFFFL)
2918                    )
2919                     continue;
2920                 if (sz == LLONGSIZE &&
2921                     ((ref_.EV.E2.Eoper == OPconst &&
2922                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFFFFFFFFFL) ||
2923                      c1 & ~0x7FFFFFFFFFFFFFFFL)
2924                    )
2925                     continue;
2926             }
2927 
2928             /* If the incr is a decrement, and the relational is < or <=,
2929              * and its unsigned, then don't do it because it could drop below 0.
2930              * https://issues.dlang.org/show_bug.cgi?id=16189
2931              */
2932             if ((einc.Eoper == OPminass || einc.EV.E2.Eoper == OPconst && el_tolong(einc.EV.E2) < 0) &&
2933                 (ref_.Eoper == OPlt || ref_.Eoper == OPle) &&
2934                 (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)))
2935                 continue;
2936 
2937             /* If loop started out with a signed conditional that was
2938              * replaced with an unsigned one, don't do it if c2
2939              * is less than 0.
2940              */
2941             if (ref_.Nflags & NFLtouns && fl.c2.Eoper == OPconst)
2942             {
2943                 targ_llong c2 = el_tolong(fl.c2);
2944                 if (c2 < 0)
2945                     continue;
2946             }
2947 
2948             elem *refE2 = el_copytree(ref_.EV.E2);
2949             int refEoper = ref_.Eoper;
2950 
2951             /* if c1 < 0 and relop is < <= > >=
2952                then adjust relop as if both sides were multiplied
2953                by -1
2954              */
2955             if (!tyuns(ty) &&
2956                 (tyintegral(ty) && el_tolong(fl.c1) < 0 ||
2957                  tyfloating(ty) && el_toldoubled(fl.c1) < 0.0))
2958                 refEoper = swaprel(refEoper);
2959 
2960             /* Replace (X relop e) with (X relop (short)e)
2961                if T is 1 word but e is 2
2962              */
2963             if (tysize(flty) == SHORTSIZE &&
2964                 tysize(refE2.Ety) == LONGSIZE)
2965                 refE2 = el_una(OP32_16,flty,refE2);
2966 
2967             /* replace e with e*c1 + c2             */
2968             elem* C2 = el_copytree(fl.c2);
2969             elem* fofe = el_bin(OPadd,flty,
2970                             el_bin(OPmul,refE2.Ety,
2971                                     refE2,
2972                                     el_copytree(fl.c1)),
2973                             C2);
2974             fofe = doptelem(fofe,GOALvalue | GOALagain);    // fold any constants
2975 
2976             if (tyuns(flty) && refEoper == OPge &&
2977                 fofe.Eoper == OPconst && el_allbits(fofe, 0) &&
2978                 fl.c2.Eoper == OPconst && !el_allbits(fl.c2, 0))
2979             {
2980                 /* Don't do it if replacement will result in
2981                  * an unsigned T>=0 which will be an infinite loop.
2982                  */
2983                 el_free(fofe);
2984                 continue;
2985             }
2986 
2987             if (debugc) printf("Eliminating basic IV '%s'\n",X.Sident.ptr);
2988 
2989             debug if (debugc)
2990             {
2991                 printf("Comparison replaced: ");
2992                 WReqn(ref_);
2993                 printf(" with ");
2994             }
2995 
2996             el_free(ref_.EV.E2);
2997             ref_.EV.E2 = refE2;
2998             ref_.Eoper = cast(ubyte)refEoper;
2999 
3000             elimass(einc);          // dump the increment elem
3001 
3002             // replace X with T
3003             assert(ref_.EV.E1.EV.Voffset == 0);
3004             ref_.EV.E1.EV.Vsym = fl.FLtemp;
3005             ref_.EV.E1.Ety = flty;
3006             ref_.EV.E2 = fofe;
3007 
3008             /* If sizes of expression worked out wrong...
3009                Which can happen if we have (int)ptr==e
3010              */
3011             if (OTbinary(fofe.Eoper))         // if didn't optimize it away
3012             {
3013                 const tym_t fofety = fofe.Ety;
3014                 const int sz = tysize(fofety);
3015                 tym_t ty1 = fofe.EV.E1.Ety;
3016                 const tym_t ty2 = fofe.EV.E2.Ety;
3017                 /* Sizes of + expression must all be the same       */
3018                 if (sz != tysize(ty1) &&
3019                     sz != tysize(ty2)
3020                    )
3021                 {
3022                     if (tyuns(fofety))      // if unsigned comparison
3023                         ty1 = touns(ty1);   /* to unsigned type     */
3024                     fofe.Ety = ty1;
3025                     ref_.EV.E1.Ety = ty1;
3026                 }
3027             }
3028 
3029             /* Fix if leaves of compare are TYfptrs and the compare */
3030             /* operator is < <= > >=.                               */
3031             if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && tyfv(ref_.EV.E1.Ety))
3032             {
3033                 assert(tyfv(ref_.EV.E2.Ety));
3034                 ref_.EV.E1 = el_una(OPoffset,TYuint,ref_.EV.E1);
3035                 ref_.EV.E2 = el_una(OPoffset,TYuint,fofe);
3036             }
3037 
3038             debug if (debugc)
3039             {
3040                 WReqn(ref_);
3041                 printf("\n");
3042             }
3043 
3044             go.changes++;
3045             doflow = true;                  /* redo flow analysis   */
3046 
3047             /* if X is live on entry to any successor S outside loop */
3048             /*      prepend elem X=(T-c2)/c1 to S.Belem     */
3049 
3050             for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3051             {
3052                 elem *ne;
3053                 block *b;
3054 
3055                 foreach (bl; ListRange(dfo[i].Bsucc))
3056                 {   /* for each successor   */
3057                     b = list_block(bl);
3058                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3059                         continue;       /* inside loop  */
3060                     if (!vec_testbit(X.Ssymnum,b.Binlv))
3061                         continue;       /* not live     */
3062 
3063                     C2 = el_copytree(fl.c2);
3064                     ne = el_bin(OPmin,ty,
3065                             el_var(fl.FLtemp),
3066                             C2);
3067                     if (tybasic(ne.EV.E1.Ety) == TYfptr &&
3068                         tybasic(ne.EV.E2.Ety) == TYfptr)
3069                     {
3070                         ne.Ety = I64 ? TYllong : TYint;
3071                         if (tylong(ty) && _tysize[TYint] == 2)
3072                             ne = el_una(OPs16_32,ty,ne);
3073                     }
3074 
3075                     ne = el_bin(OPeq,X.ty(),
3076                             el_var(X),
3077                             el_bin(OPdiv,ne.Ety,
3078                                 ne,
3079                                 el_copytree(fl.c1)));
3080 
3081                     debug if (debugc)
3082                     {
3083                         printf("Adding (");
3084                         WReqn(ne);
3085                         printf(") to exit block B%d\n",b.Bdfoidx);
3086                         //elem_print(ne);
3087                     }
3088 
3089                     /* We have to add a new block if there is */
3090                     /* more than one predecessor to b.      */
3091                     if (list_next(b.Bpred))
3092                     {
3093                         block *bn = block_calloc();
3094                         bn.Btry = b.Btry;
3095                         bn.BC = BCgoto;
3096                         bn.Bnext = dfo[i].Bnext;
3097                         dfo[i].Bnext = bn;
3098                         list_append(&(bn.Bsucc),b);
3099                         list_append(&(bn.Bpred),dfo[i]);
3100                         bl.ptr = cast(void *)bn;
3101                         foreach (bl2; ListRange(b.Bpred))
3102                             if (list_block(bl2) == dfo[i])
3103                             {
3104                                 bl2.ptr = cast(void *)bn;
3105                                 goto L2;
3106                             }
3107                         assert(0);
3108                     L2:
3109                         b = bn;
3110                         addblk = true;
3111                     }
3112 
3113                     if (b.Belem)
3114                         b.Belem =
3115                             el_bin(OPcomma,b.Belem.Ety,
3116                                 ne,b.Belem);
3117                     else
3118                         b.Belem = ne;
3119                     go.changes++;
3120                     doflow = true;  /* redo flow analysis   */
3121                 } /* for each successor */
3122             } /* foreach exit block */
3123             if (addblk)
3124                 return;
3125         }
3126         else if (refcount == 0)                 /* if no uses of IV in loop  */
3127         {
3128             /* Eliminate the basic IV if it is not live on any successor */
3129             for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3130             {
3131                 foreach (bl; ListRange(dfo[i].Bsucc))
3132                 {   /* for each successor   */
3133                     block *b = list_block(bl);
3134                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3135                         continue;       /* inside loop  */
3136                     if (vec_testbit(X.Ssymnum,b.Binlv))
3137                         goto L1;        /* live         */
3138                 }
3139             }
3140 
3141             if (debugc) printf("No uses, eliminating basic IV '%s' (%p)\n",X.Sident.ptr,X);
3142 
3143             /* Remove the (s op= e2) by replacing it with (1 , e2)
3144              * and let later passes remove the (1 ,) nodes.
3145              * Don't remove those nodes here because other biv's may refer
3146              * to them.
3147              */
3148             {
3149                 elem* ei = *biv.IVincr;
3150                 ei.Eoper = OPcomma;
3151                 ei.EV.E1.Eoper = OPconst;
3152                 ei.EV.E1.Ety = TYint;
3153             }
3154 
3155             go.changes++;
3156             doflow = true;                  /* redo flow analysis   */
3157           L1:
3158         }
3159     } /* for */
3160 }
3161 
3162 
3163 /***********************
3164  * Eliminate opeq IVs that are not used outside the loop.
3165  */
3166 
3167 private void elimopeqs(ref loop l)
3168 {
3169     elem **pref;
3170     Symbol *X;
3171     int refcount;
3172 
3173     if (debugc) printf("elimopeqs(%p)\n", &l);
3174     //foreach (ref biv; l.Lopeqlist) elem_print(*(biv.IVincr));
3175 
3176     foreach (ref biv; l.Lopeqlist)
3177     {
3178         // Can't eliminate this basic IV if we have a goal for the
3179         // increment elem.
3180         // Be careful about Nflags being in a union...
3181         if (!((*biv.IVincr).Nflags & NFLnogoal))
3182             continue;
3183 
3184         X = biv.IVbasic;
3185         assert(symbol_isintab(X));
3186         pref = onlyref(X,l,*biv.IVincr,&refcount);
3187 
3188         // if only ref of X is of the form (X) or (X relop e) or (e relop X)
3189         if (pref != null && refcount <= 1)
3190         { }
3191         else if (refcount == 0)                 // if no uses of IV in loop
3192         {   // Eliminate the basic IV if it is not live on any successor
3193             uint i;
3194             for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3195             {
3196                 foreach (bl; ListRange(dfo[i].Bsucc))
3197                 {   // for each successor
3198                     block *b = list_block(bl);
3199                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3200                         continue;       // inside loop
3201                     if (vec_testbit(X.Ssymnum,b.Binlv))
3202                         goto L1;        // live
3203                 }
3204             }
3205 
3206             if (debugc) printf("No uses, eliminating opeq IV '%s' (%p)\n",X.Sident.ptr,X);
3207 
3208             /* Remove the (s op= e2) by replacing it with (1 , e2)
3209              * and let later passes remove the (1 ,) nodes.
3210              * Don't remove those nodes here because other biv's may refer
3211              * to them, for nodes like (sum += i++)
3212              */
3213             {
3214                 elem* einc = *(biv.IVincr);
3215                 einc.Eoper = OPcomma;
3216                 einc.EV.E1.Eoper = OPconst;
3217                 einc.EV.E1.Ety = TYint;
3218             }
3219 
3220             go.changes++;
3221             doflow = true;                      // redo flow analysis
3222         L1:
3223         }
3224     }
3225 }
3226 
3227 /**************************
3228  * Find simplest elem in family.
3229  * Params:
3230  *      fams = array of famlist's
3231  *      tym  = type of basic IV
3232  * Returns: index into fams[] of simplest; fams.length if none found.
3233  */
3234 
3235 extern (D)
3236 private size_t simfl(famlist[] fams, tym_t tym)
3237 {
3238     size_t sofar = fams.length;
3239 
3240     foreach (i, ref fl; fams)
3241     {
3242         if (fl.FLtemp == FLELIM)       /* no variable, so skip it      */
3243             continue;
3244         /* If size of replacement is less than size of biv, we could    */
3245         /* be in trouble due to loss of precision.                      */
3246         if (size(fl.FLtemp.ty()) < size(tym))
3247             continue;
3248 
3249         // pick simplest
3250         sofar = sofar == fams.length ? i
3251                                      : (flcmp(fams[sofar], fams[i]) ? sofar : i);
3252     }
3253     return sofar;
3254 }
3255 
3256 /**************************
3257  * Return simpler of two family elems.
3258  * There is room for improvement, namely if
3259  *      f1.c1 = 2, f2.c1 = 27
3260  * then pick f1 because it is a shift.
3261  * Returns:
3262  *      true for f1 is simpler, false  for f2 is simpler
3263  */
3264 
3265 private bool flcmp(ref famlist f1, ref famlist f2)
3266 {
3267     auto t1 = &(f1.c1.EV);
3268     auto t2 = &(f2.c1.EV);
3269     auto ty = (*f1.FLpelem).Ety;           // type of elem
3270 
3271     static if (0)
3272     {
3273         printf("f1: c1 = %d, c2 = %d\n",t1.Vshort,f1.c2.EV.Vshort);
3274         printf("f2: c1 = %d, c2 = %d\n",t2.Vshort,f2.c2.EV.Vshort);
3275         WRTYxx((*f1.FLpelem).Ety);
3276         WRTYxx((*f2.FLpelem).Ety);
3277     }
3278 
3279     /* Wimp out and just pick f1 if the types don't match               */
3280     if (tysize(ty) == tysize((*f2.FLpelem).Ety))
3281     {
3282         switch (tybasic(ty))
3283         {   case TYbool:
3284             case TYchar:
3285             case TYschar:
3286             case TYuchar:
3287                 if (t2.Vuchar == 1 ||
3288                     t1.Vuchar != 1 && f2.c2.EV.Vuchar == 0)
3289                         goto Lf2;
3290                 break;
3291 
3292             case TYshort:
3293             case TYushort:
3294             case TYchar16:
3295             case TYwchar_t:     // BUG: what about 4 byte wchar_t's?
3296             case_short:
3297                 if (t2.Vshort == 1 ||
3298                     t1.Vshort != 1 && f2.c2.EV.Vshort == 0)
3299                         goto Lf2;
3300                 break;
3301 
3302             case TYsptr:
3303             case TYcptr:
3304             case TYnptr:        // BUG: 64 bit pointers?
3305             case TYimmutPtr:
3306             case TYsharePtr:
3307             case TYrestrictPtr:
3308             case TYfgPtr:
3309             case TYnullptr:
3310             case TYint:
3311             case TYuint:
3312                 if (_tysize[TYint] == SHORTSIZE)
3313                     goto case_short;
3314                 else
3315                     goto case_long;
3316 
3317             case TYlong:
3318             case TYulong:
3319             case TYdchar:
3320             case TYfptr:
3321             case TYvptr:
3322             case TYhptr:
3323             case_long:
3324                 if (t2.Vlong == 1 ||
3325                     t1.Vlong != 1 && f2.c2.EV.Vlong == 0)
3326                         goto Lf2;
3327                 break;
3328 
3329             case TYfloat:
3330                 if (t2.Vfloat == 1 ||
3331                     t1.Vfloat != 1 && f2.c2.EV.Vfloat == 0)
3332                         goto Lf2;
3333                 break;
3334 
3335             case TYdouble:
3336             case TYdouble_alias:
3337                 if (t2.Vdouble == 1.0 ||
3338                     t1.Vdouble != 1.0 && f2.c2.EV.Vdouble == 0)
3339                         goto Lf2;
3340                 break;
3341 
3342             case TYldouble:
3343                 if (t2.Vldouble == 1.0 ||
3344                     t1.Vldouble != 1.0 && f2.c2.EV.Vldouble == 0)
3345                         goto Lf2;
3346                 break;
3347 
3348             case TYllong:
3349             case TYullong:
3350                 if (t2.Vllong == 1 ||
3351                     t1.Vllong != 1 && f2.c2.EV.Vllong == 0)
3352                         goto Lf2;
3353                 break;
3354 
3355             default:
3356                 assert(0);
3357         }
3358     }
3359     //printf("picking f1\n");
3360     return true;
3361 
3362 Lf2:
3363     //printf("picking f2\n");
3364     return false;
3365 }
3366 
3367 /************************************
3368  * Input:
3369  *      x       basic IV symbol
3370  *      incn    increment elem for basic IV X.
3371  * Output:
3372  *      *prefcount      # of references to X other than the increment elem
3373  * Returns:
3374  *      If ref of X in loop l is of the form (X relop e) or (e relop X)
3375  *              Return the relop elem
3376  *      Else
3377  *              Return null
3378  */
3379 
3380 private __gshared
3381 {
3382     int count;
3383     elem **nd;
3384     elem *sincn;
3385     Symbol *X;
3386 }
3387 
3388 private elem ** onlyref(Symbol *x, ref loop l,elem *incn,int *prefcount)
3389 {
3390     uint i;
3391 
3392     //printf("onlyref('%s')\n", x.Sident.ptr);
3393     X = x;                                /* save some parameter passing  */
3394     assert(symbol_isintab(x));
3395     sincn = incn;
3396 
3397     debug
3398       if (!(X.Ssymnum < globsym.length && incn))
3399           printf("X = %d, globsym.length = %d, l = %p, incn = %p\n",cast(int) X.Ssymnum,cast(int) globsym.length,&l,incn);
3400 
3401     assert(X.Ssymnum < globsym.length && incn);
3402     count = 0;
3403     nd = null;
3404     for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3405     {
3406         block *b;
3407 
3408         b = dfo[i];
3409         if (b.Belem)
3410         {
3411             countrefs(&b.Belem,b.BC == BCiftrue);
3412         }
3413     }
3414 
3415     static if (0)
3416     {
3417         printf("count = %d, nd = (");
3418         if (nd) WReqn(*nd);
3419         printf(")\n");
3420     }
3421 
3422     *prefcount = count;
3423     return nd;
3424 }
3425 
3426 
3427 /******************************
3428  * Count elems of the form (X relop e) or (e relop X).
3429  * Do not count the node if it is the increment node (sincn).
3430  * Input:
3431  *      flag:   true if block wants to test the elem
3432  */
3433 
3434 private void countrefs(elem **pn,bool flag)
3435 {
3436     elem *n = *pn;
3437 
3438     assert(n);
3439     if (n == sincn)                       /* if it is the increment elem  */
3440     {
3441         if (OTbinary(n.Eoper))
3442             countrefs(&n.EV.E2, false);
3443         return;                         // don't count lvalue
3444     }
3445     if (OTunary(n.Eoper))
3446         countrefs(&n.EV.E1,false);
3447     else if (OTbinary(n.Eoper))
3448     {
3449         if (OTrel(n.Eoper))
3450         {
3451             elem *e1 = n.EV.E1;
3452 
3453             assert(e1.Eoper != OPcomma);
3454             if (e1 == sincn &&
3455                 (e1.Eoper == OPeq || OTopeq(e1.Eoper)))
3456                 goto L1;
3457 
3458             /* Check both subtrees to see if n is the comparison node,
3459              * that is, if X is a leaf of the comparison.
3460              */
3461             if (e1.Eoper == OPvar && e1.EV.Vsym == X && !countrefs2(n.EV.E2) ||
3462                 n.EV.E2.Eoper == OPvar && n.EV.E2.EV.Vsym == X && !countrefs2(e1))
3463                 nd = pn;                /* found the relop node */
3464         }
3465     L1:
3466         countrefs(&n.EV.E1,false);
3467         countrefs(&n.EV.E2,(flag && n.Eoper == OPcomma));
3468     }
3469     else if ((n.Eoper == OPvar || n.Eoper == OPrelconst) && n.EV.Vsym == X)
3470     {
3471         if (flag)
3472             nd = pn;                    /* comparing it with 0          */
3473         count++;                        /* found another reference      */
3474     }
3475 }
3476 
3477 /*******************************
3478  * Count number of times symbol X appears in elem tree e.
3479  */
3480 
3481 private int countrefs2(elem *e)
3482 {
3483     elem_debug(e);
3484     while (OTunary(e.Eoper))
3485         e = e.EV.E1;
3486     if (OTbinary(e.Eoper))
3487         return countrefs2(e.EV.E1) + countrefs2(e.EV.E2);
3488     return ((e.Eoper == OPvar || e.Eoper == OPrelconst) &&
3489             e.EV.Vsym == X);
3490 }
3491 
3492 /****************************
3493  * Eliminate some special cases.
3494  */
3495 
3496 private void elimspec(ref loop l)
3497 {
3498     uint i;
3499 
3500     for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3501     {
3502         block *b;
3503 
3504         b = dfo[i];
3505         if (b.Belem)
3506             elimspecwalk(&b.Belem);
3507     }
3508 }
3509 
3510 
3511 /******************************
3512  */
3513 
3514 private void elimspecwalk(elem **pn)
3515 {
3516     elem *n;
3517 
3518     n = *pn;
3519     assert(n);
3520     if (OTunary(n.Eoper))
3521         elimspecwalk(&n.EV.E1);
3522     else if (OTbinary(n.Eoper))
3523     {
3524         elimspecwalk(&n.EV.E1);
3525         elimspecwalk(&n.EV.E2);
3526         if (OTrel(n.Eoper))
3527         {
3528             elem *e1 = n.EV.E1;
3529 
3530             /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3).
3531              * This will reduce the number of cases for elimbasivs().
3532              * Don't do equivalent with (e1 rel (e2,e3)) because
3533              * of potential side effects in e1.
3534              */
3535             if (e1.Eoper == OPcomma)
3536             {
3537                 elem *e;
3538 
3539                 debug if (debugc)
3540                 {   printf("3rewriting ("); WReqn(n); printf(")\n"); }
3541 
3542                 e = n.EV.E2;
3543                 n.EV.E2 = e1;
3544                 n.EV.E1 = n.EV.E2.EV.E1;
3545                 n.EV.E2.EV.E1 = n.EV.E2.EV.E2;
3546                 n.EV.E2.EV.E2 = e;
3547                 n.EV.E2.Eoper = n.Eoper;
3548                 n.EV.E2.Ety = n.Ety;
3549                 n.Eoper = OPcomma;
3550 
3551                 go.changes++;
3552                 doflow = true;
3553 
3554                 elimspecwalk(&n.EV.E1);
3555                 elimspecwalk(&n.EV.E2);
3556             }
3557 
3558             /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3))
3559              * Rewrite ((X ++  e2) rel e3) into ((X +=  e2),(X-e2 rel e3))
3560              * so that the op= will not have a goal, so elimbasivs()
3561              * will work on it.
3562              */
3563             if ((OTopeq(e1.Eoper)
3564                  || OTpost(e1.Eoper)
3565                 ) &&
3566                 !el_sideeffect(e1.EV.E1))
3567             {
3568                 elem *e;
3569                 OPER op;
3570 
3571                 debug if (debugc)
3572                 {   printf("4rewriting ("); WReqn(n); printf(")\n"); }
3573 
3574                 e = el_calloc();
3575                 el_copy(e,n);
3576                 e.EV.E1 = el_copytree(e1.EV.E1);
3577                 e.EV.E1.Ety = n.EV.E1.Ety;
3578                 n.EV.E2 = e;
3579                 switch (e1.Eoper)
3580                 {
3581                     case OPpostinc:
3582                         e1.Eoper = OPaddass;
3583                         op = OPmin;
3584                         goto L3;
3585 
3586                     case OPpostdec:
3587                         e1.Eoper = OPminass;
3588                         op = OPadd;
3589                     L3: e.EV.E1 = el_bin(op,e.EV.E1.Ety,e.EV.E1,el_copytree(e1.EV.E2));
3590                         break;
3591 
3592                     default:
3593                         break;
3594                 }
3595                 /* increment node is now guaranteed to have no goal */
3596                 e1.Nflags |= NFLnogoal;
3597                 n.Eoper = OPcomma;
3598                 //go.changes++;
3599                 doflow = true;
3600 
3601                 elimspecwalk(&n.EV.E1);
3602                 elimspecwalk(&n.EV.E2);
3603             }
3604         }
3605   }
3606 }
3607 
3608 /********
3609  * Walk e in execution order.
3610  * When eincrement is found, remove it.
3611  * Continue, replacing instances of `v` with `v+increment`
3612  * When second eincrement is found, stop.
3613  * Params:
3614  *      e = expression to walk
3615  *      defnum = index of eincrement
3616  *      v = increment variable
3617  *      increment = amount to increment v
3618  *      unrolls = number of times loop has been unrolled
3619  */
3620 
3621 private void unrollWalker(elem* e, uint defnum, Symbol* v, targ_llong increment, int unrolls) nothrow
3622 {
3623     int state = 0;
3624 
3625     /***********************************
3626      * Walk e in execution order, fixing it according to state.
3627      * state == 0..unrolls-1: when eincrement is found, remove it, advance to next state
3628      * state == 1..unrolls-1: replacing instances of v with v+(state*increment),
3629      * state == unrolls-1: leave eincrement alone, advance to next state
3630      * state == unrolls: done
3631      */
3632 
3633     void walker(elem *e)
3634     {
3635         assert(e);
3636         const op = e.Eoper;
3637         if (ERTOL(e))
3638         {
3639             if (e.Edef != defnum)
3640             {
3641                 walker(e.EV.E2);
3642                 walker(e.EV.E1);
3643             }
3644         }
3645         else if (OTbinary(op))
3646         {
3647             if (e.Edef != defnum)
3648             {
3649                 walker(e.EV.E1);
3650                 walker(e.EV.E2);
3651             }
3652         }
3653         else if (OTunary(op))
3654         {
3655             assert(e.Edef != defnum);
3656             walker(e.EV.E1);
3657         }
3658         else if (op == OPvar &&
3659                  state &&
3660                  e.EV.Vsym == v)
3661         {
3662             // overwrite e with (v+increment)
3663             elem *e1 = el_calloc();
3664             el_copy(e1,e);
3665             e.Eoper = OPadd;
3666             e.EV.E1 = e1;
3667             e.EV.E2 = el_long(e.Ety, increment * state);
3668         }
3669         if (OTdef(op) && e.Edef == defnum)
3670         {
3671             // found the increment elem; neuter all but the last one
3672             if (state + 1 < unrolls)
3673             {
3674                 el_free(e.EV.E1);
3675                 el_free(e.EV.E2);
3676                 e.Eoper = OPconst;
3677                 e.EV.Vllong = 0;
3678             }
3679             ++state;
3680         }
3681     }
3682 
3683     walker(e);
3684     assert(state == unrolls);
3685 }
3686 
3687 
3688 /*********************************
3689  * Unroll loop if possible.
3690  * Params:
3691  *      l = loop to unroll
3692  * Returns:
3693  *      true if loop was unrolled
3694  */
3695 bool loopunroll(ref loop l)
3696 {
3697     const bool log = false;
3698     if (log) printf("loopunroll(%p)\n", &l);
3699 
3700     /* Do not repeatedly unroll the same loop,
3701      * or waste time attempting to
3702      */
3703     if (l.Lhead.Bflags & BFLnounroll)
3704         return false;
3705     l.Lhead.Bflags |= BFLnounroll;
3706     if (log) WRfunc();
3707 
3708     if (l.Lhead.Btry || l.Ltail.Btry)
3709         return false;
3710 
3711     /* For simplification, only unroll loops that consist only
3712      * of a head and tail, and the tail is the exit block.
3713      */
3714     int numblocks = 0;
3715     for (int i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3716         ++numblocks;
3717     if (numblocks != 2)
3718     {
3719         if (log) printf("\tnot 2 blocks, but %d\n", numblocks);
3720         return false;
3721     }
3722     assert(l.Lhead != l.Ltail);
3723 
3724     /* tail must be the sole exit block
3725      */
3726     if (vec_testbit(l.Lhead.Bdfoidx, l.Lexit) ||
3727         !vec_testbit(l.Ltail.Bdfoidx, l.Lexit))
3728     {
3729         if (log) printf("\ttail not sole exit block\n");
3730         return false;
3731     }
3732 
3733     elem *ehead = l.Lhead.Belem;
3734     elem *etail = l.Ltail.Belem;
3735 
3736     if (log)
3737     {
3738         printf("Unroll candidate:\n");
3739         printf("  head B%d:\t", l.Lhead.Bdfoidx); WReqn(l.Lhead.Belem); printf("\n");
3740         printf("  tail B%d:\t", l.Ltail.Bdfoidx); WReqn(l.Ltail.Belem); printf("\n");
3741     }
3742 
3743     /* Tail must be of the form: (v < c) or (v <= c) where v is an unsigned integer
3744      */
3745     if ((etail.Eoper != OPlt && etail.Eoper != OPle) ||
3746         etail.EV.E1.Eoper != OPvar ||
3747         etail.EV.E2.Eoper != OPconst)
3748     {
3749         if (log) printf("\tnot (v < c)\n");
3750         return false;
3751     }
3752 
3753     elem *e1 = etail.EV.E1;
3754     elem *e2 = etail.EV.E2;
3755 
3756     if (!tyintegral(e1.Ety) ||
3757         tysize(e1.Ety) > targ_llong.sizeof ||
3758         !(tyuns(e1.Ety) || tyuns(e2.Ety)))
3759     {
3760         if (log) printf("\tnot (integral unsigned)\n");
3761         return false;
3762     }
3763 
3764     int cost = el_length(ehead);
3765     //printf("test4 cost: %d\n", cost);
3766 
3767     if (cost > 100)
3768     {
3769         if (log) printf("\tcost %d\n", cost);
3770         return false;
3771     }
3772     if (log) printf("cost %d\n", cost);
3773 
3774     Symbol* v = e1.EV.Vsym;
3775 
3776     // RD info is only reliable for registers and autos
3777     if (!(sytab[v.Sclass] & SCRD) || !(v.Sflags & SFLunambig))
3778     {
3779         if (log) printf("\tnot SCRD\n");
3780         return false;
3781     }
3782 
3783     /* Find the initial, increment elem, and final value of s
3784      */
3785     elem *einitial;
3786     elem *eincrement;
3787     if (!findloopparameters(etail, einitial, eincrement))
3788     {
3789         if (log) printf("\tnot findloopparameters()\n");
3790         return false;
3791     }
3792 
3793     targ_llong initial = el_tolong(einitial.EV.E2);
3794     targ_llong increment = el_tolong(eincrement.EV.E2);
3795     if (eincrement.Eoper == OPpostdec || eincrement.Eoper == OPminass)
3796         increment = -increment;
3797     targ_llong final_ = el_tolong(e2);
3798 
3799     if (log) printf("initial = %lld, increment = %lld, final = %lld\n",cast(long)initial,cast(long)increment,cast(long)final_);
3800 
3801     if (etail.Eoper == OPle)
3802         ++final_;
3803 
3804     if (initial < 0 ||
3805         final_ < initial ||
3806         increment <= 0 ||
3807         (final_ - initial) % increment)
3808     {
3809         if (log) printf("\tnot (evenly divisible)\n");
3810         return false;
3811     }
3812 
3813     /* If loop would only execute once anyway, just remove the test at the end
3814      */
3815     if (initial + increment == final_)
3816     {
3817         if (log) printf("\tjust once\n");
3818         etail.Eoper = OPcomma;
3819         e2.EV.Vllong = 0;
3820         e2.Ety = etail.Ety;
3821         return false;
3822     }
3823 
3824     /* number of times the loop is unrolled
3825      */
3826     targ_ullong numIterations = (final_ - initial) / increment;
3827     const int unrolls = (numIterations < 1000 / cost)
3828         ? cast(int)numIterations
3829         : 2;
3830 
3831     if (unrolls == 0 || (final_ - initial) % unrolls)
3832     {
3833         if (log) printf("\tnot (divisible by %d)\n", unrolls);
3834         return false;
3835     }
3836 
3837     if (log) printf("Unrolling starting\n");
3838 
3839     // Double the increment
3840     eincrement.EV.E2.EV.Vllong *= unrolls;
3841     //printf("  4head:\t"); WReqn(l.Lhead.Belem); printf("\n");
3842 
3843     elem* e = null;
3844     foreach (i; 0 .. unrolls)
3845         e = el_combine(e, el_copytree(ehead));
3846 
3847     /* Walk e in execution order.
3848      * When eincrement is found, remove it.
3849      * Continue, replacing instances of `v` with `v+increment`
3850      * When last eincrement is found, stop.
3851      */
3852     unrollWalker(e, eincrement.Edef, v, increment, unrolls);
3853 
3854     l.Lhead.Belem = e;
3855 
3856     /* If unrolled loop would only execute once anyway, just remove the test at the end
3857      */
3858     if (initial + unrolls * increment == final_)
3859     {
3860         if (log) printf("\tcompletely unrolled\n");
3861         etail.Eoper = OPcomma;
3862         e2.EV.Vllong = 0;
3863         e2.Ety = etail.Ety;
3864     }
3865 
3866     //WRfunc();
3867     return true;
3868 }
3869 
3870 /******************************
3871  * Count number of elems in a tree
3872  * Params:
3873  *  e = tree
3874  * Returns:
3875  *  number of elems in tree
3876  */
3877 int el_length(elem *e)
3878 {
3879     int n = 0;
3880     while (e)
3881     {
3882         n += 1;
3883         if (!OTleaf(e.Eoper))
3884         {
3885             if (e.Eoper == OPctor || e.Eoper == OPdtor)
3886                 return 10_000;
3887             n += el_length(e.EV.E2);
3888             e = e.EV.E1;
3889         }
3890         else
3891             break;
3892     }
3893     return n;
3894 }
3895 
3896 
3897 }