1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1985-1998 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/gloop.d, backend/gloop.d)
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/gloop.d
11  */
12 
13 
14 module dmd.backend.gloop;
15 
16 version (SCPP)
17     version = COMPILE;
18 version (MARS)
19     version = COMPILE;
20 
21 version (COMPILE)
22 {
23 
24 import core.stdc.stdio;
25 import core.stdc.stdlib;
26 import core.stdc..string;
27 
28 import dmd.backend.cc;
29 import dmd.backend.cdef;
30 import dmd.backend.code_x86;
31 import dmd.backend.evalu8 : el_toldoubled;
32 import dmd.backend.oper;
33 import dmd.backend.global;
34 import dmd.backend.goh;
35 import dmd.backend.el;
36 import dmd.backend.outbuf;
37 import dmd.backend.ty;
38 import dmd.backend.type;
39 
40 import dmd.backend.dlist;
41 import dmd.backend.dvec;
42 import dmd.backend.mem;
43 
44 nothrow:
45 
46 char symbol_isintab(Symbol *s) { return sytab[s.Sclass] & SCSS; }
47 
48 extern (C++):
49 
50 bool findloopparameters(elem* erel, ref elem* rdeq, ref elem* rdinc);
51 
52 /*********************************
53  * Loop data structure.
54  */
55 
56 struct loop
57 {
58 nothrow:
59     loop *Lnext;        // Next loop in list (startloop -> start of list)
60     vec_t Lloop;        // Vector of blocks in this loop
61     vec_t Lexit;        // Vector of exit blocks of loop
62     block *Lhead;       // Pointer to header of loop
63     block *Ltail;       // Pointer to tail
64     block *Lpreheader;  // Pointer to preheader (if any)
65     list_t Llis;        // loop invariant elems moved to Lpreheader, so
66                         // redundant temporaries aren't created
67     Iv *Livlist;        // basic induction variables
68     Iv *Lopeqlist;      // list of other op= variables
69 
70     /***********************
71      * Write loop.
72      */
73 
74     void print()
75     {
76         debug
77         {
78             loop *l = &this;
79             printf("loop %p, next = %p\n",l,(l) ? l.Lnext : cast(loop *) null);
80             if (!l)
81                 return;
82             printf("\thead: B%d, tail: B%d, prehead: B%d\n",l.Lhead.Bdfoidx,
83                 l.Ltail.Bdfoidx,(l.Lpreheader ) ? l.Lpreheader.Bdfoidx
84                                                 : cast(uint)-1);
85             printf("\tLloop "); vec_println(l.Lloop);
86             printf("\tLexit "); vec_println(l.Lexit);
87         }
88     }
89 
90     /***************************
91      * Allocate loop.
92      */
93 
94     static loop *mycalloc()
95     {   loop *l;
96 
97         if (freelist)
98         {
99             l = freelist;
100             freelist = l.Lnext;
101             memset(l,0,loop.sizeof);
102         }
103         else
104             l = cast(loop *) mem_calloc(loop.sizeof);
105         return l;
106     }
107 
108     __gshared loop *freelist;
109 }
110 
111 struct famlist
112 {
113 nothrow:
114     elem **FLpelem;         /* parent of elem in the family         */
115     elem *c1;
116     elem *c2;               // c1*(basic IV) + c2
117     Symbol *FLtemp;         // symbol index of temporary (FLELIM if */
118                             /* this entry has no temporary)         */
119     tym_t FLty;             /* type of this induction variable      */
120     tym_t FLivty;           /* type of the basic IV elem (which is  */
121                             /* not necessarilly the type of the IV  */
122                             /* elem!)                               */
123     famlist *FLnext;        // next in list
124 
125     void print()
126     {
127         debug
128         {
129             printf("famlist:\n");
130             printf("*FLpelem:\n");
131             elem_print(*FLpelem);
132             printf("c1:");
133             elem_print(c1);
134             printf("c2:");
135             elem_print(c2);
136             printf("FLty = "); WRTYxx(FLty);
137             printf("\nFLivty = "); WRTYxx(FLivty);
138             printf("\n");
139         }
140     }
141 
142     /***************************
143      * Allocate famlist.
144      */
145 
146     static famlist *mycalloc()
147     {   famlist *fl;
148 
149         if (freelist)
150         {
151             fl = freelist;
152             freelist = fl.FLnext;
153             memset(fl,0,famlist.sizeof);
154         }
155         else
156             fl = cast(famlist *) mem_calloc(famlist.sizeof);
157         return fl;
158     }
159 
160     __gshared famlist *freelist;
161 }
162 
163 enum FLELIM = cast(Symbol *)-1;
164 
165 struct Iv
166 {
167 nothrow:
168     Symbol *IVbasic;        // symbol of basic IV
169     elem **IVincr;          // pointer to parent of IV increment elem
170     famlist *IVfamily;      // variables in this family
171     Iv *IVnext;             // next iv in list
172 
173     void print()
174     {
175         debug
176         {
177             printf("IV: '%s'\n",IVbasic.Sident.ptr);
178             printf("*IVincr:\n");
179             elem_print(*IVincr);
180         }
181     }
182 
183     /***************************
184      * Allocate Iv.
185      */
186 
187     static Iv *mycalloc()
188     {   Iv *iv;
189 
190         if (freelist)
191         {
192             iv = freelist;
193             freelist = iv.IVnext;
194             memset(iv,0,Iv.sizeof);
195         }
196         else
197             iv = cast(Iv *) mem_calloc(Iv.sizeof);
198         return iv;
199     }
200 
201     __gshared Iv *freelist;
202 }
203 
204 
205 private __gshared bool addblk;                    /* if TRUE, then we added a block */
206 
207 /* is elem loop invariant?      */
208 int isLI(elem *n) { return n.Nflags & NFLli; }
209 
210 /* make elem loop invariant     */
211 void makeLI(elem *n) { n.Nflags |= NFLli; }
212 
213 /******************************
214  *      Only variables that could only be unambiguously defined
215  *      are candidates for loop invariant removal and induction
216  *      variables.
217  *      This means only variables that have the SFLunambig flag
218  *      set for them.
219  *      Doing this will still cover 90% (I hope) of the cases, and
220  *      is a lot faster to compute.
221  */
222 
223 /*************
224  * Free loops.
225  */
226 
227 private void freeloop(loop **pl)
228 {
229     loop *ln;
230 
231     for (loop *l = *pl; l; l = ln)
232     {
233         ln = l.Lnext;
234         vec_free(l.Lloop);
235         vec_free(l.Lexit);
236         list_free(&l.Llis);
237         l.Lnext = loop.freelist;
238         loop.freelist = l;
239     }
240     *pl = null;
241 }
242 
243 
244 /**********************************
245  * Initialize block information.
246  * Returns:
247  *      !=0     contains BCasm block
248  */
249 
250 int blockinit()
251 {
252     bool hasasm = false;
253 
254     assert(dfo);
255     uint i = 0;
256     foreach (b; BlockRange(startblock))
257     {
258         debug                   /* check integrity of Bpred and Bsucc   */
259           L1:
260             foreach (blp; ListRange(b.Bpred))
261             {
262                 foreach (bls; ListRange(list_block(blp).Bsucc))
263                     if (list_block(bls) == b)
264                         continue L1;
265                 assert(0);
266             }
267 
268         ++i;
269         if (b.BC == BCasm)
270             hasasm = true;
271                                         /* compute number of blocks     */
272     }
273     assert(numblks == i && maxblks);
274     assert(i <= maxblks);
275     foreach (j, b; dfo[])
276     {
277         assert(b.Bdfoidx == j);
278         b.Bdom = vec_realloc(b.Bdom, maxblks); /* alloc Bdom vectors */
279         vec_clear(b.Bdom);
280     }
281     return hasasm;
282 }
283 
284 /****************************************
285  * Compute dominators (Bdom) for each block.
286  * See Aho & Ullman Fig. 13.5.
287  * Note that flow graph is reducible if there is only one
288  * pass through the loop.
289  * Input:
290  *      dfo[]
291  * Output:
292  *      fills in the Bdom vector for each block
293  */
294 
295 void compdom()
296 {
297     compdom(dfo[]);
298 }
299 
300 private extern (D) void compdom(block*[] dfo)
301 {
302     assert(dfo.length);
303     block* sb = dfo[0];                  // starting block
304 
305     vec_clear(sb.Bdom);
306     vec_setbit(0,sb.Bdom);               // starting block only doms itself
307     foreach (b; dfo)                     // for all except startblock
308     {
309         if (b != sb)
310             vec_set(b.Bdom);             // dominate all blocks
311     }
312 
313     vec_t t1 = vec_calloc(vec_numbits(sb.Bdom));       // allocate a temporary
314     uint cntr = 0;                       // # of times thru loop
315     bool chgs;
316     do
317     {
318         chgs = false;
319         foreach (i, b; dfo)              // for each block in dfo[]
320         {
321             if (i == 0)
322                 continue;                // except startblock
323             if (b.Bpred)                 // if there are predecessors
324             {
325                 vec_set(t1);
326                 foreach (bl; ListRange(b.Bpred))
327                 {
328                     vec_andass(t1,list_block(bl).Bdom);
329                 }
330             }
331             else
332                 vec_clear(t1);      // no predecessors to dominate
333             vec_setbit(i,t1);       // each block doms itself
334             if (chgs)
335                 vec_copy(b.Bdom,t1);
336             else if (!vec_equal(b.Bdom,t1))   // if any changes
337             {
338                 vec_copy(b.Bdom,t1);
339                 chgs = true;
340             }
341         }
342         cntr++;
343         assert(cntr < 50);              // should have converged by now
344     } while (chgs);
345     vec_free(t1);
346 
347     debug if (debugc)
348     {
349         printf("Flow graph is%s reducible\n", cntr <= 2 ? "".ptr : " not".ptr);
350     }
351 }
352 
353 /***************************
354  * Return !=0 if block A dominates block B.
355  */
356 
357 bool dom(block *A,block *B)
358 {
359     assert(A && B && dfo && dfo[A.Bdfoidx] == A);
360     return vec_testbit(A.Bdfoidx,B.Bdom) != 0;
361 }
362 
363 /**********************
364  * Find all the loops.
365  */
366 
367 private extern (D) void findloops(block*[] dfo, loop **ploops)
368 {
369     freeloop(ploops);
370 
371     //printf("findloops()\n");
372     foreach (b; dfo)
373         b.Bweight = 1;             // reset Bweights
374     foreach_reverse (b; dfo)       // for each block (note reverse
375                                    // dfo order, so most nested
376                                    // loops are found first)
377     {
378         assert(b);
379         foreach (bl; ListRange(b.Bsucc))
380         {
381             block *s = list_block(bl);      // each successor s to b
382             assert(s);
383             if (dom(s,b))                   // if s dominates b
384                 buildloop(ploops,s,b);      // we found a loop
385         }
386     }
387 
388     debug if (debugc)
389     {
390         for (loop *l = *ploops; l; l = l.Lnext)
391             l.print();
392     }
393 }
394 
395 /********************************
396  */
397 
398 private uint loop_weight(uint weight, int factor) pure
399 {
400     // Be careful not to overflow
401     if (weight < 0x1_0000)
402         weight *= 10 * factor;
403     else if (weight < 0x10_0000)
404         weight *= 2 * factor;
405     else
406         weight += factor;
407     assert(cast(int)weight > 0);
408     return weight;
409 }
410 
411 /*****************************
412  * Construct natural loop.
413  * Algorithm 13.1 from Aho & Ullman.
414  * Note that head dom tail.
415  */
416 
417 private void buildloop(loop **ploops,block *head,block *tail)
418 {
419     loop *l;
420 
421     //printf("buildloop()\n");
422     /* See if this is part of an existing loop. If so, merge the two.     */
423     for (l = *ploops; l; l = l.Lnext)
424         if (l.Lhead == head)           /* two loops with same header   */
425         {
426             vec_t v;
427 
428             // Calculate loop contents separately so we get the Bweights
429             // done accurately.
430 
431             v = vec_calloc(maxblks);
432             vec_setbit(head.Bdfoidx,v);
433             head.Bweight = loop_weight(head.Bweight, 1);
434             insert(tail,v);
435 
436             vec_orass(l.Lloop,v);      // merge into existing loop
437             vec_free(v);
438 
439             vec_clear(l.Lexit);        // recompute exit blocks
440             goto L1;
441         }
442 
443     /* Allocate loop entry        */
444     l = loop.mycalloc();
445     l.Lnext = *ploops;
446     *ploops = l;                         // put l at beginning of list
447 
448     l.Lloop = vec_calloc(maxblks);       /* allocate loop bit vector     */
449     l.Lexit = vec_calloc(maxblks);       /* bit vector for exit blocks   */
450     l.Lhead = head;
451     l.Ltail = tail;
452 
453     vec_setbit(head.Bdfoidx,l.Lloop);    /* add head to the loop         */
454     head.Bweight = loop_weight(head.Bweight, 2);  // *20 usage for loop header
455 
456     insert(tail,l.Lloop);                /* insert tail in loop          */
457 
458 L1:
459     /* Find all the exit blocks (those blocks with
460      * successors outside the loop).
461      */
462 
463     uint i;
464     for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in this loop
465     {
466         if (dfo[i].BC == BCret || dfo[i].BC == BCretexp || dfo[i].BC == BCexit)
467             vec_setbit(i,l.Lexit); /* ret blocks are exit blocks */
468         else
469         {
470             foreach (bl; ListRange(dfo[i].Bsucc))
471                 if (!vec_testbit(list_block(bl).Bdfoidx,l.Lloop))
472                 {
473                     vec_setbit(i,l.Lexit);
474                     break;
475                 }
476         }
477     }
478 
479     /*  Find preheader, if any, to the loop.
480         The preheader is a block that has only the head as a successor.
481         All other predecessors of head must be inside the loop.
482      */
483     l.Lpreheader = null;
484     foreach (bl; ListRange(head.Bpred))
485     {
486         block *b = list_block(bl);
487 
488         if (!vec_testbit(b.Bdfoidx,l.Lloop))  /* if not in loop       */
489         {
490             if (l.Lpreheader)                 /* if already one       */
491             {
492                 l.Lpreheader = null;          /* can only be one      */
493                 break;
494             }
495             else
496             {
497                 if (list_next(b.Bsucc))       // if more than 1 successor
498                     break;                    // b can't be a preheader
499                 l.Lpreheader = b;
500             }
501         }
502     }
503 }
504 
505 /********************************
506  * Support routine for buildloop().
507  * Add a block b and all its predecessors to loop lv.
508  */
509 
510 private void insert(block *b, vec_t lv)
511 {
512     assert(b && lv);
513     if (!vec_testbit(b.Bdfoidx,lv))     /* if block is not in loop      */
514     {
515         vec_setbit(b.Bdfoidx,lv);       /* add block to loop            */
516         b.Bweight = loop_weight(b.Bweight,1);   // *10 usage count
517         foreach (bl; ListRange(b.Bpred))
518             insert(list_block(bl),lv);  /* insert all its predecessors  */
519     }
520 }
521 
522 /**************************************
523  * Perform loop rotations.
524  * Loop starts as:
525  *
526  *         prehead
527  *          |
528  *          v
529  *      +->head---->
530  *      |   |
531  *      |   v
532  *      |  body
533  *      |   |
534  *      |   v
535  *      +--tail
536  *
537  * Two types are done:
538  *      1) Header is moved to be past the tail.
539  *
540  *         prehead
541  *          |
542  *      +---+
543  *      |
544  *      |  body<-+
545  *      |   |    |
546  *      |   v    |
547  *      |  tail  |
548  *      |   |    |
549  *      |   v    |
550  *      +->head--+
551  *          |
552  *          v
553  *
554  *      2) Header is copied past the tail (done only if MFtime is set).
555  *
556  *         prehead
557  *          |
558  *          v
559  *         head1-----+
560  *          |        |
561  *          v        |
562  *         body<--+  |
563  *          |     |  |
564  *          v     |  |
565  *         tail   |  |
566  *          |     |  |
567  *          v     |  |
568  *         head2--+  |
569  *          |        |
570  *          +--------+
571  *          v
572  *
573  * Input:
574  *      Loop information (do not depend on the preheader information)
575  * Output:
576  *      Revised list of blocks, a new dfo and new loop information
577  * Returns:
578  *      true need to recompute loop data
579  */
580 
581 private int looprotate(loop *l)
582 {
583     block *tail = l.Ltail;
584     block *head = l.Lhead;
585 
586     //printf("looprotate(%p)\n",l);
587 
588     // Do not rotate loop if:
589     if (head == tail ||                         // loop is only one block big
590         !vec_testbit(head.Bdfoidx,l.Lexit))   // header is not an exit block
591         goto Lret;
592 
593     if (//iter != 1 &&
594         vec_testbit(tail.Bdfoidx,l.Lexit))    // tail is an exit block
595         goto Lret;
596 
597     // Do not rotate if already rotated
598     foreach (b; BlockRange(tail.Bnext))
599         if (b == head)                  // if loop already rotated
600             goto Lret;
601 
602     if (head.BC == BCtry)
603          goto Lret;
604     if (head.BC == BC_try)
605          goto Lret;
606 
607     //if (debugc) { printf("looprotate: "); l.print(); }
608 
609     if ((go.mfoptim & MFtime) && head.BC != BCswitch && head.BC != BCasm)
610     {   // Duplicate the header past the tail (but doing
611         // switches would be too expensive in terms of code
612         // generated).
613            block *head2;
614            list_t *pbl2;
615            list_t *pbln;
616 
617         head2 = block_calloc(); // create new head block
618         numblks++;                      // number of blocks in existence
619         head2.Btry = head.Btry;
620         head2.Bflags = head.Bflags;
621         head.Bflags = BFLnomerg;       // move flags over to head2
622         head2.Bflags |= BFLnomerg;
623         head2.BC = head.BC;
624         assert(head2.BC != BCswitch);
625         if (head.Belem)                // copy expression tree
626             head2.Belem = el_copytree(head.Belem);
627         head2.Bnext = tail.Bnext;
628         tail.Bnext = head2;
629 
630         // pred(head1) = pred(head) outside loop
631         // pred(head2) = pred(head) inside loop
632         pbl2 = &(head2.Bpred);
633         for (list_t *pbl = &(head.Bpred); *pbl; pbl = pbln)
634         {
635             if (vec_testbit(list_block(*pbl).Bdfoidx, l.Lloop))
636             {   // if this predecessor is inside the loop
637 
638                 *pbl2 = *pbl;
639                 *pbl = list_next(*pbl);
640                 pbln = pbl;                     // don't skip this next one
641                 (*pbl2).next = null;
642                 auto bsucc = list_block(*pbl2).Bsucc;
643                 pbl2 = &((*pbl2).next);
644                 foreach (bl; ListRange(bsucc))
645                     if (list_block(bl) == head)
646                     {
647                         bl.ptr = cast(void *)head2;
648                         goto L2;
649                     }
650                 assert(0);
651         L2:
652             }
653             else
654                 pbln = &((*pbl).next);      // next predecessor in list
655         } // for each pred(head)
656 
657         // succ(head2) = succ(head)
658         foreach (bl; ListRange(head.Bsucc))
659         {
660             list_append(&(head2.Bsucc),list_block(bl));
661             list_append(&(list_block(bl).Bpred),head2);
662         }
663         if (debugc) printf("1Rotated loop %p\n", l);
664         go.changes++;
665         return true;
666     }
667     else if (startblock != head
668             /* This screws up the OPctor/OPdtor sequence for:
669              *   struct CString
670              *   {   CString();
671              *      ~CString();
672              *      int GetLength();
673              *   };
674              *
675              *   void f(void)
676              *   {  for(;;)
677              *      {   CString s ;
678              *    if(s.GetLength()!=0)
679              *       break ;
680              *      }
681              *   }
682              */
683             && !(config.flags3 & CFG3eh)
684             )
685     {   // optimize for space
686         // Simply position the header past the tail
687         foreach (b; BlockRange(startblock))
688         {
689             if (b.Bnext == head)
690             {   // found parent b of head
691                 b.Bnext = head.Bnext;
692                 head.Bnext = tail.Bnext;
693                 tail.Bnext = head;
694                 if (debugc) printf("2Rotated loop %p\n", l);
695                 go.changes++;
696                 return false;
697             }
698         }
699         assert(0);
700     }
701 Lret:
702     return false;
703 }
704 
705 private __gshared
706 {
707     int gref;                // parameter for markinvar()
708     block *gblock;           // parameter for markinvar()
709     vec_t lv;                // parameter for markinvar()
710     vec_t gin;               // parameter for markinvar()
711     bool doflow;             // true if flow analysis has to be redone
712 }
713 
714 /*********************************
715  * Loop invariant and induction variable elimination.
716  * Input:
717  *      iter    which optimization iteration we are on
718  */
719 
720 void loopopt()
721 {
722     loop *ln;
723     vec_t rd;
724     loop *startloop;
725 
726     if (debugc) printf("loopopt()\n");
727     startloop = null;
728 restart:
729     file_progress();
730     if (blockinit())                    // init block data
731     {
732         findloops(dfo[], &startloop);          // Compute Bweights
733         freeloop(&startloop);           // free existing loops
734         return;                         // can't handle ASM blocks
735     }
736     compdom();                          // compute dominators
737     findloops(dfo[], &startloop);              // find the loops
738 
739     for (loop *l = startloop; l; l = ln)
740     {
741         ln = l.Lnext;
742         if (looprotate(l))              // rotate the loop
743         {
744             compdfo();
745             blockinit();
746             compdom();
747             findloops(dfo[], &startloop);      // may trash l.Lnext
748             if (ln)
749             {   ln = startloop;         // start over
750                 file_progress();
751             }
752         }
753     }
754     // Make sure there is a preheader for each loop.
755 
756     addblk = false;                     /* assume no blocks added        */
757     for (loop *l = startloop; l; l = l.Lnext) // for each loop
758     {
759         //if (debugc) l.print();
760 
761         if (!l.Lpreheader)             /* if no preheader               */
762         {
763             block *h;
764             block *p;
765 
766             if (debugc) printf("Generating preheader for loop\n");
767             addblk = true;              /* add one                       */
768             p = block_calloc();         // the preheader
769             numblks++;
770             assert (numblks <= maxblks);
771             h = l.Lhead;               /* loop header                   */
772 
773             /* Find parent of h */
774             if (h == startblock)
775                 startblock = p;
776             else
777             {
778                 block *ph;
779                 for (ph = startblock; 1; ph = ph.Bnext)
780                 {
781                     assert(ph);         /* should have found it         */
782                     if (ph.Bnext == h)
783                         break;
784                 }
785                 /* Link p into block list between ph and h      */
786                 ph.Bnext = p;
787             }
788             p.Bnext = h;
789 
790             l.Lpreheader = p;
791             p.BC = BCgoto;
792             assert(p.Bsucc == null);
793             list_append(&(p.Bsucc),h); /* only successor is h          */
794             p.Btry = h.Btry;
795 
796             if (debugc) printf("Adding preheader %p to loop %p\n",p,l);
797 
798             // Move preds of h that aren't in the loop to preds of p
799             for (list_t bl = h.Bpred; bl;)
800             {
801                 block *b = list_block(bl);
802 
803                 if (!vec_testbit (b.Bdfoidx, l.Lloop))
804                 {
805                     list_append(&(p.Bpred), b);
806                     list_subtract(&(h.Bpred), b);
807                     bl = h.Bpred;      /* dunno what subtract did      */
808 
809                     /* Fix up successors of predecessors        */
810                     foreach (bls; ListRange(b.Bsucc))
811                         if (list_block(bls) == h)
812                                 bls.ptr = cast(void *)p;
813                 }
814                 else
815                     bl = list_next(bl);
816             }
817             list_append(&(h.Bpred),p); /* p is a predecessor to h      */
818         }
819     } /* for */
820     if (addblk)                         /* if any blocks were added      */
821     {
822         compdfo();                      /* compute depth-first order    */
823         blockinit();
824         compdom();
825         findloops(dfo[], &startloop);          // recompute block info
826         addblk = false;
827     }
828 
829     /* Do the loop optimizations. Note that accessing the loops */
830     /* starting from startloop will access them in least nested */
831     /* one first, thus moving LIs out as far as possible.       */
832 
833     doflow = true;                      /* do flow analysis             */
834 
835     if (go.mfoptim & MFtime)
836     {
837         if (debugc) printf("Starting loop unrolling\n");
838         for (loop *l = startloop; l; l = ln)
839         {
840             ln = l.Lnext;
841             if (loopunroll(l))
842             {
843                 compdfo();                      // compute depth-first order
844                 blockinit();
845                 compdom();
846                 findloops(dfo[], &startloop);          // recompute block info
847                 doflow = true;
848                 if (ln)
849                 {   ln = startloop;         // start over
850                     file_progress();
851                 }
852             }
853         }
854     }
855 
856     if (debugc) printf("Starting loop invariants\n");
857 
858     for (loop *l = startloop; l; l = l.Lnext)
859     {
860         uint i,j;
861 
862         //if (debugc) l.print();
863 
864         file_progress();
865         assert(l.Lpreheader);
866         if (doflow)
867         {
868             flowrd();               /* compute reaching definitions  */
869             flowlv();               /* compute live variables        */
870             flowae();               // compute available expressions
871             doflow = false;         /* no need to redo it           */
872             if (go.defnod.length == 0)     /* if no definition elems       */
873                 break;              /* no need to optimize          */
874         }
875         lv = l.Lloop;
876         if (debugc) printf("...Loop %p start...\n",l);
877 
878         /* Unmark all elems in this loop         */
879         for (i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i)
880             if (dfo[i].Belem)
881                 unmarkall(dfo[i].Belem);       /* unmark all elems     */
882 
883         /* Find & mark all LIs   */
884         gin = vec_clone(l.Lpreheader.Bout);
885         rd = vec_calloc(go.defnod.length);        /* allocate our running RD vector */
886         for (i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i) // for each block in loop
887         {
888             block *b = dfo[i];
889 
890             if (debugc) printf("B%d\n",i);
891             if (b.Belem)
892             {
893                 vec_copy(rd, b.Binrd); // IN reaching defs
894                 static if (0)
895                 {
896                     printf("i = %d\n",i);
897                     {
898                         for (int j = 0; j < go.defnod.length; j++)
899                             elem_print(go.defnod[j].DNelem);
900                     }
901                     printf("rd    : "); vec_println(rd);
902                 }
903                 gblock = b;
904                 gref = 0;
905                 if (b != l.Lhead)
906                     gref = 1;
907                 markinvar(b.Belem, rd);
908                 static if (0)
909                 {
910                     printf("B%d\n", i);
911                     {
912                         foreach (j; 0 .. go.defnod.length)
913                         {
914                             printf("  [%2d] ", j);
915                             WReqn(go.defnod[j].DNelem);
916                             printf("\n");
917                         }
918                     }
919                     printf("rd    : "); vec_println(rd);
920                     printf("Boutrd: "); vec_println(b.Boutrd);
921                 }
922                 assert(vec_equal(rd, b.Boutrd));
923             }
924             else
925                 assert(vec_equal(b.Binrd, b.Boutrd));
926         }
927         vec_free(rd);
928         vec_free(gin);
929 
930         /* Move loop invariants  */
931         for (i = 0; (i = cast(uint) vec_index(i, lv)) < dfo.length; ++i)
932         {
933             int domexit;                // true if this block dominates all
934                                         // exit blocks of the loop
935 
936             for (j = 0; (j = cast(uint) vec_index(j, l.Lexit)) < dfo.length; ++j) // for each exit block
937             {
938                 if (!vec_testbit (i, dfo[j].Bdom))
939                 {
940                     domexit = 0;
941                     goto L1;                // break if !(i dom j)
942                 }
943             }
944             // if i dom (all exit blocks)
945             domexit = 1;
946         L1:
947             if (dfo[i].Belem)
948             {   // If there is any hope of making an improvement
949                 if (domexit || l.Llis)
950                 {
951                     //if (dfo[i] != l.Lhead)
952                         //domexit |= 2;
953                     movelis(dfo[i].Belem, dfo[i], l, &domexit);
954                 }
955             }
956         }
957         //list_free(&l.Llis,FPnull);
958         if (debugc) printf("...Loop %p done...\n",l);
959 
960         if (go.mfoptim & MFliv)
961         {
962             loopiv(l);              /* induction variables          */
963             if (addblk)             /* if we added a block          */
964             {
965                 compdfo();
966                 goto restart;       /* play it safe and start over  */
967             }
968         }
969     } /* for */
970     freeloop(&startloop);
971 }
972 
973 /*****************************
974  * If elem is loop invariant, mark it.
975  * Input:
976  *      lv =    vector of all the blocks in this loop.
977  *      rd =    vector of loop invariants for this elem. This must be
978  *              continually updated.
979  * Note that we do not iterate until no more LIs are found. The only
980  * thing this would buy us is stuff that depends on LI assignments.
981  */
982 
983 private void markinvar(elem *n,vec_t rd)
984 {
985     vec_t tmp;
986     uint i;
987     Symbol *v;
988     elem *n1;
989 
990     assert(n && rd);
991     assert(vec_numbits(rd) == go.defnod.length);
992     switch (n.Eoper)
993     {
994         case OPaddass:  case OPminass:  case OPmulass:  case OPandass:
995         case OPorass:   case OPxorass:  case OPdivass:  case OPmodass:
996         case OPshlass:  case OPshrass:  case OPashrass:
997         case OPpostinc: case OPpostdec:
998         case OPcall:
999         case OPvecsto:
1000         case OPcmpxchg:
1001             markinvar(n.EV.E2,rd);
1002             goto case OPnegass;
1003 
1004         case OPnegass:
1005             n1 = n.EV.E1;
1006             if (n1.Eoper == OPind)
1007                     markinvar(n1.EV.E1,rd);
1008             else if (OTbinary(n1.Eoper))
1009             {   markinvar(n1.EV.E1,rd);
1010                 markinvar(n1.EV.E2,rd);
1011             }
1012         L2:
1013             if (n.Eoper == OPcall ||
1014                 gblock.Btry ||
1015                 !(n1.Eoper == OPvar &&
1016                     symbol_isintab(n1.EV.Vsym)))
1017             {
1018                 gref = 1;
1019             }
1020 
1021             updaterd(n,rd,null);
1022             break;
1023 
1024         case OPcallns:
1025             markinvar(n.EV.E2,rd);
1026             markinvar(n.EV.E1,rd);
1027             break;
1028 
1029         case OPstrcpy:
1030         case OPstrcat:
1031         case OPmemcpy:
1032         case OPmemset:
1033             markinvar(n.EV.E2,rd);
1034             markinvar(n.EV.E1,rd);
1035             updaterd(n,rd,null);
1036             break;
1037 
1038         case OPbtc:
1039         case OPbtr:
1040         case OPbts:
1041             markinvar(n.EV.E1,rd);
1042             markinvar(n.EV.E2,rd);
1043             updaterd(n,rd,null);
1044             break;
1045 
1046         case OPucall:
1047             markinvar(n.EV.E1,rd);
1048             goto case OPasm;
1049 
1050         case OPasm:
1051             gref = 1;
1052             updaterd(n,rd,null);
1053             break;
1054 
1055         case OPucallns:
1056         case OPstrpar:
1057         case OPstrctor:
1058         case OPvector:
1059         case OPvoid:
1060         case OPstrlen:
1061         case OPddtor:
1062         case OPinp:
1063         case OPprefetch:                // don't mark E2
1064             markinvar(n.EV.E1,rd);
1065             break;
1066 
1067         case OPcond:
1068         case OPparam:
1069         case OPstrcmp:
1070         case OPmemcmp:
1071         case OPbt:                      // OPbt is like OPind, assume not LI
1072         case OPoutp:
1073             markinvar(n.EV.E1,rd);
1074             markinvar(n.EV.E2,rd);
1075             break;
1076 
1077         case OPandand:
1078         case OPoror:
1079             markinvar(n.EV.E1,rd);
1080             tmp = vec_clone(rd);
1081             markinvar(n.EV.E2,tmp);
1082             if (el_returns(n.EV.E2))
1083                 vec_orass(rd,tmp);              // rd |= tmp
1084             vec_free(tmp);
1085             break;
1086 
1087         case OPcolon:
1088         case OPcolon2:
1089             tmp = vec_clone(rd);
1090             switch (el_returns(n.EV.E1) * 2 | int(el_returns(n.EV.E2)))
1091             {
1092                 case 3: // E1 and E2 return
1093                     markinvar(n.EV.E1,rd);
1094                     markinvar(n.EV.E2,tmp);
1095                     vec_orass(rd,tmp);              // rd |= tmp
1096                     break;
1097                 case 2: // E1 returns
1098                     markinvar(n.EV.E1,rd);
1099                     markinvar(n.EV.E2,tmp);
1100                     break;
1101                 case 1: // E2 returns
1102                     markinvar(n.EV.E1,tmp);
1103                     markinvar(n.EV.E2,rd);
1104                     break;
1105                 case 0: // neither returns
1106                     markinvar(n.EV.E1,tmp);
1107                     vec_copy(tmp,rd);
1108                     markinvar(n.EV.E2,tmp);
1109                     break;
1110                 default:
1111                     assert(0);
1112             }
1113             vec_free(tmp);
1114             break;
1115 
1116         case OPaddr:            // mark addresses of OPvars as LI
1117             markinvar(n.EV.E1,rd);
1118             if (n.EV.E1.Eoper == OPvar || isLI(n.EV.E1))
1119                 makeLI(n);
1120             break;
1121 
1122         case OPmsw:
1123         case OPneg:     case OPbool:    case OPnot:     case OPcom:
1124         case OPs16_32:  case OPd_s32:   case OPs32_d:
1125         case OPd_s16:   case OPs16_d:   case OPd_f:     case OPf_d:
1126         case OP32_16:   case OPu8_16:
1127         case OPld_d:    case OPd_ld:
1128         case OPld_u64:
1129         case OPc_r:     case OPc_i:
1130         case OPu16_32:
1131         case OPu16_d:   case OPd_u16:
1132         case OPs8_16:   case OP16_8:
1133         case OPd_u32:   case OPu32_d:
1134 
1135         case OPs32_64:  case OPu32_64:
1136         case OP64_32:
1137         case OPd_s64:   case OPd_u64:
1138         case OPs64_d:
1139         case OPu64_d:
1140         case OP128_64:
1141         case OPs64_128:
1142         case OPu64_128:
1143 
1144         case OPabs:
1145         case OPrndtol:
1146         case OPrint:
1147         case OPsetjmp:
1148         case OPbsf:
1149         case OPbsr:
1150         case OPbswap:
1151         case OPpopcnt:
1152         case OPsqrt:
1153         case OPsin:
1154         case OPcos:
1155         case OPvp_fp: /* BUG for MacHandles */
1156         case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp:
1157         case OPcvp_fp:
1158         case OPvecfill:
1159             markinvar(n.EV.E1,rd);
1160             if (isLI(n.EV.E1))        /* if child is LI               */
1161                 makeLI(n);
1162             break;
1163 
1164         case OPeq:
1165         case OPstreq:
1166             markinvar(n.EV.E2,rd);
1167             n1 = n.EV.E1;
1168             markinvar(n1,rd);
1169 
1170             /* Determine if assignment is LI. Conditions are:       */
1171             /* 1) Rvalue is LI                                      */
1172             /* 2) Lvalue is a variable (simplifies things a lot)    */
1173             /* 3) Lvalue can only be affected by unambiguous defs   */
1174             /* 4) No rd's of lvalue that are within the loop (other */
1175             /*    than the current def)                             */
1176             if (isLI(n.EV.E2) && n1.Eoper == OPvar)          /* 1 & 2 */
1177             {
1178                 v = n1.EV.Vsym;
1179                 if (v.Sflags & SFLunambig)
1180                 {
1181                     tmp = vec_calloc(go.defnod.length);
1182                     //filterrd(tmp,rd,v);
1183                     listrds(rd,n1,tmp);
1184                     for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)
1185                         if (go.defnod[i].DNelem != n &&
1186                             vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1187                                 goto L3;
1188                     makeLI(n);      // then the def is LI
1189                 L3: vec_free(tmp);
1190                 }
1191             }
1192             goto L2;
1193 
1194         case OPadd:     case OPmin:     case OPmul:     case OPand:
1195         case OPor:      case OPxor:     case OPdiv:     case OPmod:
1196         case OPshl:     case OPshr:     case OPeqeq:    case OPne:
1197         case OPlt:      case OPle:      case OPgt:      case OPge:
1198         case OPashr:
1199         case OPror:     case OProl:
1200         case OPbtst:
1201 
1202         case OPunord:   case OPlg:      case OPleg:     case OPule:
1203         case OPul:      case OPuge:     case OPug:      case OPue:
1204         case OPngt:     case OPnge:     case OPnlt:     case OPnle:
1205         case OPord:     case OPnlg:     case OPnleg:    case OPnule:
1206         case OPnul:     case OPnuge:    case OPnug:     case OPnue:
1207 
1208         case OPcomma:
1209         case OPpair:
1210         case OPrpair:
1211         case OPremquo:
1212         case OPscale:
1213         case OPyl2x:
1214         case OPyl2xp1:
1215             markinvar(n.EV.E1,rd);
1216             markinvar(n.EV.E2,rd);
1217             if (isLI(n.EV.E2) && isLI(n.EV.E1))
1218                     makeLI(n);
1219             break;
1220 
1221         case OPind:                     /* must assume this is not LI   */
1222             markinvar(n.EV.E1,rd);
1223             if (isLI(n.EV.E1))
1224             {
1225                 static if (0)
1226                 {
1227                     // This doesn't work with C++, because exp2_ptrtocomtype() will
1228                     // transfer const to where it doesn't belong.
1229                     if (n.Ety & mTYconst)
1230                     {
1231                         makeLI(n);
1232                     }
1233 
1234                     // This was disabled because it was marking as LI
1235                     // the loop dimension for the [i] array if
1236                     // a[j][i] was in a loop. This meant the a[j] array bounds
1237                     // check for the a[j].length was skipped.
1238                     else if (n.Ejty)
1239                     {
1240                         tmp = vec_calloc(go.defnod.length);
1241                         filterrdind(tmp,rd,n);  // only the RDs pertaining to n
1242 
1243                         // if (no RDs within loop)
1244                         //      then it's loop invariant
1245 
1246                         for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)  // for each RD
1247                             if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1248                                 goto L10;       // found a RD in the loop
1249 
1250                         // If gref has occurred, this can still be LI
1251                         // if n is an AE that was also an AE at the
1252                         // point of gref.
1253                         // We can catch a subset of these cases by looking
1254                         // at the AEs at the start of the loop.
1255                         if (gref)
1256                         {
1257                             int j;
1258 
1259                             //printf("\tn is: "); WReqn(n); printf("\n");
1260                             for (j = 0; (j = cast(uint) vec_index(j, gin)) < go.exptop; ++j)
1261                             {
1262                                 elem *e = go.expnod[j];
1263 
1264                                 //printf("\t\tgo.expnod[%d] = %p\n",j,e);
1265                                 //printf("\t\tAE is: "); WReqn(e); printf("\n");
1266                                 if (el_match2(n,e))
1267                                 {
1268                                     makeLI(n);
1269                                     //printf("Ind LI: "); WReqn(n); printf("\n");
1270                                     break;
1271                                 }
1272                             }
1273                         }
1274                         else
1275                             makeLI(n);
1276                   L10:
1277                         vec_free(tmp);
1278                         break;
1279                     }
1280                 }
1281             }
1282             break;
1283 
1284         case OPvar:
1285             v = n.EV.Vsym;
1286             if (v.Sflags & SFLunambig)     // must be unambiguous to be LI
1287             {
1288                 tmp = vec_calloc(go.defnod.length);
1289                 //filterrd(tmp,rd,v);       // only the RDs pertaining to v
1290                 listrds(rd,n,tmp);  // only the RDs pertaining to v
1291 
1292                 // if (no RDs within loop)
1293                 //  then it's loop invariant
1294 
1295                 for (i = 0; (i = cast(uint) vec_index(i, tmp)) < go.defnod.length; ++i)  // for each RD
1296                     if (vec_testbit(go.defnod[i].DNblock.Bdfoidx,lv))
1297                         goto L1;    // found a RD in the loop
1298                 makeLI(n);
1299 
1300             L1: vec_free(tmp);
1301             }
1302             break;
1303 
1304         case OPstring:
1305         case OPrelconst:
1306         case OPconst:                   /* constants are always LI      */
1307         case OPframeptr:
1308             makeLI(n);
1309             break;
1310 
1311         case OPinfo:
1312             markinvar(n.EV.E2,rd);
1313             break;
1314 
1315         case OPstrthis:
1316         case OPmark:
1317         case OPctor:
1318         case OPdtor:
1319         case OPdctor:
1320         case OPhalt:
1321         case OPgot:                     // shouldn't OPgot be makeLI ?
1322             break;
1323 
1324         default:
1325             WROP(n.Eoper);
1326             //printf("n.Eoper = %d, OPconst = %d\n", n.Eoper, OPconst);
1327             assert(0);
1328     }
1329 
1330     debug
1331     {
1332         if (debugc && isLI(n))
1333         {
1334             printf("  LI elem: ");
1335             WReqn(n);
1336             printf("\n");
1337         }
1338     }
1339 }
1340 
1341 /**************************************
1342  * Fill in the DefNode.DNumambig vector.
1343  * Set bits defnod[] indices for entries
1344  * which are completely destroyed when e is
1345  * unambiguously assigned to.
1346  * Params:
1347  *      v = vector to fill in
1348  *      e = defnod[] entry that is an assignment to a variable
1349  */
1350 
1351 extern (C)
1352 {
1353 void fillInDNunambig(vec_t v, elem *e)
1354 {
1355     assert(OTassign(e.Eoper));
1356     elem *t = e.EV.E1;
1357     assert(t.Eoper == OPvar);
1358     Symbol *d = t.EV.Vsym;
1359 
1360     targ_size_t toff = t.EV.Voffset;
1361     targ_size_t tsize = (e.Eoper == OPstreq) ? type_size(e.ET) : tysize(t.Ety);
1362     targ_size_t ttop = toff + tsize;
1363 
1364     //printf("updaterd: "); WReqn(n); printf(" toff=%d, tsize=%d\n", toff, tsize);
1365 
1366 
1367     // for all unambig defs in go.defnod[]
1368     foreach (const i; 0 .. go.defnod.length)
1369     {
1370         elem *tn = go.defnod[i].DNelem;
1371         elem *tn1;
1372         targ_size_t tn1size;
1373 
1374         if (!OTassign(tn.Eoper))
1375             continue;
1376 
1377         // If def of same variable, kill that def
1378         tn1 = tn.EV.E1;
1379         if (tn1.Eoper != OPvar || d != tn1.EV.Vsym)
1380             continue;
1381 
1382         // If t completely overlaps tn1
1383         tn1size = (tn.Eoper == OPstreq)
1384             ? type_size(tn.ET) : tysize(tn1.Ety);
1385         if (toff <= tn1.EV.Voffset &&
1386             tn1.EV.Voffset + tn1size <= ttop)
1387         {
1388             vec_setbit(cast(uint)i, v);
1389         }
1390     }
1391 }
1392 }
1393 
1394 /********************
1395  * Update rd vector.
1396  * Input:
1397  *      n       assignment elem or function call elem or OPasm elem
1398  *      rd      reaching def vector to update
1399  *              (clear bits for defs we kill, set bit for n (which is the
1400  *               def we are genning))
1401  *      vecdim  go.defnod.length
1402  */
1403 
1404 extern (C) {
1405 void updaterd(elem *n,vec_t GEN,vec_t KILL)
1406 {
1407     const op = n.Eoper;
1408     elem *t;
1409 
1410     assert(OTdef(op));
1411     assert(GEN);
1412     elem_debug(n);
1413 
1414     uint ni = n.Edef;
1415     assert(ni != -1);
1416 
1417     // If unambiguous def
1418     if (OTassign(op) && (t = n.EV.E1).Eoper == OPvar)
1419     {
1420         vec_t v = go.defnod[ni].DNunambig;
1421         assert(v);
1422         if (KILL)
1423             vec_orass(KILL, v);
1424         vec_subass(GEN, v);
1425     }
1426     else
1427     {
1428         static if (0)
1429         {
1430             if (OTassign(op) && t.Eoper != OPvar && t.Ejty)
1431             {
1432                 // for all unambig defs in go.defnod[]
1433                 foreach (uint i; 0 .. go.defnod.length)
1434                 {
1435                     elem *tn = go.defnod[i].DNelem;
1436                     elem *tn1;
1437 
1438                     if (tn == n)
1439                         ni = i;
1440 
1441                     if (!OTassign(tn.Eoper))
1442                         continue;
1443 
1444                     // If def of same variable, kill that def
1445                     tn1 = tn.EV.E1;
1446                     if (tn1.Eoper != OPind || t.Ejty != tn1.Ejty)
1447                         continue;
1448 
1449                     if (KILL)
1450                         vec_setbit(i,KILL);
1451                     vec_clearbit(i,GEN);
1452                 }
1453             }
1454         }
1455     }
1456 
1457     vec_setbit(ni,GEN);                 // set bit in GEN for this def
1458 }
1459 }
1460 
1461 /***************************
1462  * Mark all elems as not being loop invariant.
1463  */
1464 
1465 private void unmarkall(elem *e)
1466 {
1467     for (; 1; e = e.EV.E1)
1468     {
1469         assert(e);
1470         e.Nflags &= ~NFLli;            /* unmark this elem             */
1471         if (OTunary(e.Eoper))
1472             continue;
1473         else if (OTbinary(e.Eoper))
1474         {
1475             unmarkall(e.EV.E2);
1476             continue;
1477         }
1478         return;
1479     }
1480 }
1481 
1482 
1483 /********************************
1484  * Search for references to v in tree n before nstop is encountered.
1485  * Params:
1486  *      v = symbol to search for
1487  *      n = tree to search
1488  *      nstop = stop searching tree when reaching this elem
1489  * Returns:
1490  *    true if there are any refs of v in n before nstop is encountered
1491  */
1492 
1493 private bool refs(Symbol *v,elem *n,elem *nstop)
1494 {
1495     symbol_debug(v);
1496     assert(symbol_isintab(v));
1497     assert(v.Ssymnum < globsym.top);
1498     bool stop = false;
1499 
1500     // Walk tree in evaluation order
1501     bool walk(elem* n)
1502     {
1503         elem_debug(n);
1504         assert(n);
1505 
1506         if (stop)
1507             return false;
1508         bool f = false;
1509         const op = n.Eoper;
1510         if (OTunary(op))
1511             f = walk(n.EV.E1);
1512         else if (OTbinary(op))
1513         {
1514             if (ERTOL(n))                   /* watch order of evaluation    */
1515             {
1516                 /* Note that (OPvar = e) is not a ref of OPvar, whereas     */
1517                 /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */
1518                 /* a ref of OPvar, etc.                                     */
1519                 f = walk(n.EV.E2);
1520                 if (!f)
1521                 {
1522                     if (op == OPeq)
1523                     {
1524                         if (n.EV.E1.Eoper != OPvar)
1525                             f = walk(n.EV.E1.EV.E1);
1526                     }
1527                     else
1528                         f = walk(n.EV.E1);
1529                 }
1530             }
1531             else
1532                 f = walk(n.EV.E1) || walk(n.EV.E2);
1533         }
1534 
1535         if (n == nstop)
1536             stop = true;
1537         else if (n.Eoper == OPvar)           /* if variable reference        */
1538             return v == n.EV.Vsym;
1539         else if (op == OPasm)                /* everything is referenced     */
1540             return true;
1541         return f;
1542     }
1543 
1544     return walk(n);
1545 }
1546 
1547 /*************************
1548  * Move LIs to preheader.
1549  * Conditions to be satisfied for code motion are:
1550  *      1) All exit blocks are dominated (true before this is called).
1551  *                      -- OR --
1552  *      2) Variable assigned by a statement is not live on entering
1553  *         any successor outside the loop of any exit block of the
1554  *         loop.
1555  *
1556  *      3) Cannot move assignment to variable if there are any other
1557  *         assignments to that variable within the loop (true or
1558  *         assignment would not have been marked LI).
1559  *      4) Cannot move assignments to a variable if there is a use
1560  *         of that variable in this loop that is reached by any other
1561  *         def of it.
1562  *      5) Cannot move expressions that have side effects.
1563  *      6) Do not move assignments to variables that could be affected
1564  *         by ambiguous defs.
1565  *      7) It is not worth it to move expressions of the form:
1566  *              (var == const)
1567  * Input:
1568  *      n       the elem we're considering moving
1569  *      b       the block this elem is in
1570  *      l       the loop we're in
1571  *      domexit flags
1572  *      bit 0:  1       this branch is always executed
1573  *              0       this branch is only sometimes executed
1574  *      bit 1:  1       do not move LIs that could throw exceptions
1575  *                      or cannot be moved past possibly thrown exceptions
1576  * Returns:
1577  *      revised domexit
1578  */
1579 
1580 private void movelis(elem *n,block *b,loop *l,int *pdomexit)
1581 {
1582     vec_t tmp;
1583     elem *ne;
1584     elem *t;
1585     elem *n2;
1586     Symbol *v;
1587     tym_t ty;
1588 
1589 Lnextlis:
1590     //if (isLI(n)) { printf("movelis("); WReqn(n); printf(")\n"); }
1591     assert(l && n);
1592     elem_debug(n);
1593     const op = n.Eoper;
1594     switch (op)
1595     {
1596         case OPvar:
1597         case OPconst:
1598         case OPrelconst:
1599             goto Lret;
1600 
1601         case OPandand:
1602         case OPoror:
1603         case OPcond:
1604         {
1605             int domexit;
1606 
1607             movelis(n.EV.E1,b,l,pdomexit);        // always executed
1608             domexit = *pdomexit & ~1;   // sometimes executed
1609             movelis(n.EV.E2,b,l,&domexit);
1610             *pdomexit |= domexit & 2;
1611             goto Lret;
1612         }
1613 
1614         case OPeq:
1615             // Do loop invariant assignments
1616             if (isLI(n) && n.EV.E1.Eoper == OPvar)
1617             {
1618                 v = n.EV.E1.EV.Vsym;          // variable index number
1619 
1620                 if (!(v.Sflags & SFLunambig)) goto L3;         // case 6
1621 
1622                 // If case 4 is not satisfied, return
1623 
1624                 // Function parameters have an implied definition prior to the
1625                 // first block of the function. Unfortunately, the rd vector
1626                 // does not take this into account. Therefore, we assume the
1627                 // worst and reject assignments to function parameters.
1628                 if (v.Sclass == SCparameter || v.Sclass == SCregpar ||
1629                     v.Sclass == SCfastpar || v.Sclass == SCshadowreg)
1630                         goto L3;
1631 
1632                 if (el_sideeffect(n.EV.E2)) goto L3;              // case 5
1633 
1634                 // If case 1 or case 2 is not satisfied, return
1635 
1636                 if (!(*pdomexit & 1))                   // if not case 1
1637                 {
1638                     uint i;
1639                     for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
1640                     {
1641                         foreach (bl; ListRange(dfo[i].Bsucc))
1642                         {
1643                             block *s;           // successor to exit block
1644 
1645                             s = list_block(bl);
1646                             if (!vec_testbit(s.Bdfoidx,l.Lloop) &&
1647                                 (!symbol_isintab(v) ||
1648                                  vec_testbit(v.Ssymnum,s.Binlv))) // if v is live on exit
1649                                     goto L3;
1650                         }
1651                     }
1652                 }
1653 
1654                 tmp = vec_calloc(go.defnod.length);
1655                 uint i;
1656                 for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
1657                 {
1658                     if (dfo[i] == b)        // except this one
1659                         continue;
1660 
1661                     //<if there are any RDs of v in Binrd other than n>
1662                     //    <if there are any refs of v in that block>
1663                     //        return;
1664 
1665                     //filterrd(tmp,dfo[i].Binrd,v);
1666                     listrds(dfo[i].Binrd,n.EV.E1,tmp);
1667                     uint j;
1668                     for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j)  // for each RD of v in Binrd
1669                     {
1670                         if (go.defnod[j].DNelem == n)
1671                             continue;
1672                         if (dfo[i].Belem &&
1673                             refs(v,dfo[i].Belem,cast(elem *)null)) //if refs of v
1674                         {
1675                             vec_free(tmp);
1676                             goto L3;
1677                         }
1678                         break;
1679                     }
1680                 } // foreach
1681 
1682                 // <if there are any RDs of v in b.Binrd other than n>
1683                 //     <if there are any references to v before the
1684                 //      assignment to v>
1685                 //         <can't move this assignment>
1686 
1687                 //filterrd(tmp,b.Binrd,v);
1688                 listrds(b.Binrd,n.EV.E1,tmp);
1689                 uint j;
1690                 for (j = 0; (j = cast(uint) vec_index(j, tmp)) < go.defnod.length; ++j)  // for each RD of v in Binrd
1691                 {
1692                     if (go.defnod[j].DNelem == n)
1693                         continue;
1694                     if (b.Belem && refs(v,b.Belem,n))
1695                     {
1696                         vec_free(tmp);
1697                         goto L3;            // can't move it
1698                     }
1699                     break;                  // avoid redundant looping
1700                 }
1701                 vec_free(tmp);
1702 
1703                 // We have an LI assignment, n.
1704                 // Check to see if the rvalue is already in the preheader.
1705                 foreach (nl; ListRange(l.Llis))
1706                 {
1707                     if (el_match(n.EV.E2,list_elem(nl).EV.E2))
1708                     {
1709                         el_free(n.EV.E2);
1710                         n.EV.E2 = el_calloc();
1711                         el_copy(n.EV.E2,list_elem(nl).EV.E1);
1712                         if (debugc) printf("LI assignment rvalue was replaced\n");
1713                         doflow = true;
1714                         go.changes++;
1715                         break;
1716                     }
1717                 }
1718 
1719                 // move assignment elem to preheader
1720                 if (debugc) printf("Moved LI assignment ");
1721 
1722                 debug
1723                     if (debugc)
1724                     {
1725                         WReqn(n);
1726                         printf(";\n");
1727                     }
1728 
1729                 go.changes++;
1730                 doflow = true;                  // redo flow analysis
1731                 ne = el_calloc();
1732                 el_copy(ne,n);                  // create assignment elem
1733                 assert(l.Lpreheader);          // make sure there is one
1734                 appendelem(ne,&(l.Lpreheader.Belem)); // append ne to preheader
1735                 list_prepend(&l.Llis,ne);
1736 
1737                 el_copy(n,ne.EV.E1);      // replace n with just a reference to v
1738                 goto Lret;
1739             } // if
1740             break;
1741 
1742         case OPcall:
1743         case OPucall:
1744             *pdomexit |= 2;
1745             break;
1746 
1747         case OPpair:
1748         case OPrpair:                   // don't move these, as they do not do computation
1749             movelis(n.EV.E1,b,l,pdomexit);
1750             n = n.EV.E2;
1751             goto Lnextlis;
1752 
1753         default:
1754             break;
1755     }
1756 
1757 L3:
1758     // Do leaves of non-LI expressions, leaves of = elems that didn't
1759     // meet the invariant assignment removal criteria, and don't do leaves
1760     if (OTleaf(op))
1761         goto Lret;
1762     if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot ||
1763       // These are usually addressing modes, so moving them is a net loss
1764       (I32 && op == OPshl && n.EV.E2.Eoper == OPconst && el_tolong(n.EV.E2) <= 3UL)
1765      )
1766     {
1767         if (OTassign(op))
1768         {
1769             elem *n1 = n.EV.E1;
1770             elem *n11;
1771 
1772             if (OTbinary(op))
1773                 movelis(n.EV.E2,b,l,pdomexit);
1774 
1775             // Do lvalue only if it is an expression
1776             if (n1.Eoper == OPvar)
1777                 goto Lret;
1778             n11 = n1.EV.E1;
1779             if (OTbinary(n1.Eoper))
1780             {
1781                 movelis(n11,b,l,pdomexit);
1782                 n = n1.EV.E2;
1783             }
1784             // If *(x + c), just make x the LI, not the (x + c).
1785             // The +c comes free with the addressing mode.
1786             else if (n1.Eoper == OPind &&
1787                     isLI(n11) &&
1788                     n11.Eoper == OPadd &&
1789                     n11.EV.E2.Eoper == OPconst
1790                     )
1791             {
1792                 n = n11.EV.E1;
1793             }
1794             else
1795                 n = n11;
1796             movelis(n,b,l,pdomexit);
1797             if (b.Btry || !(n1.Eoper == OPvar && symbol_isintab(n1.EV.Vsym)))
1798             {
1799                 //printf("assign to global => domexit |= 2\n");
1800                 *pdomexit |= 2;
1801             }
1802         }
1803         else if (OTunary(op))
1804         {
1805             elem *e1 = n.EV.E1;
1806 
1807             // If *(x + c), just make x the LI, not the (x + c).
1808             // The +c comes free with the addressing mode.
1809             if (op == OPind &&
1810                 isLI(e1) &&
1811                 e1.Eoper == OPadd &&
1812                 e1.EV.E2.Eoper == OPconst
1813                )
1814             {
1815                 n = e1.EV.E1;
1816             }
1817             else
1818                 n = e1;
1819         }
1820         else if (OTbinary(op))
1821         {
1822             movelis(n.EV.E1,b,l,pdomexit);
1823             n = n.EV.E2;
1824         }
1825         goto Lnextlis;
1826   }
1827 
1828   if (el_sideeffect(n))
1829         goto Lret;
1830 
1831     static if (0)
1832     {
1833         printf("*pdomexit = %d\n",*pdomexit);
1834         if (*pdomexit & 2)
1835         {
1836             // If any indirections, can't LI it
1837 
1838             // If this operand has already been indirected, we can let
1839             // it pass.
1840             Symbol *s;
1841 
1842             printf("looking at:\n");
1843             elem_print(n);
1844             s = el_basesym(n.EV.E1);
1845             if (s)
1846             {
1847                 foreach (nl; ListRange(l.Llis))
1848                 {
1849                     elem *el = list_elem(nl);
1850                     el = el.EV.E2;
1851                     elem_print(el);
1852                     if (el.Eoper == OPind && el_basesym(el.EV.E1) == s)
1853                     {
1854                         printf("  pass!\n");
1855                         goto Lpass;
1856                     }
1857                 }
1858             }
1859             printf("  skip!\n");
1860             goto Lret;
1861 
1862         Lpass:
1863         }
1864     }
1865 
1866     // Move the LI expression to the preheader
1867     if (debugc) printf("Moved LI expression ");
1868 
1869     debug
1870         if (debugc)
1871         {
1872             WReqn(n);
1873             printf(";\n");
1874         }
1875 
1876     // See if it's already been moved
1877     ty = n.Ety;
1878     foreach (nl; ListRange(l.Llis))
1879     {
1880         elem *el;
1881         tym_t ty2;
1882 
1883         el = list_elem(nl);
1884         //printf("existing LI: "); WReqn(el); printf("\n");
1885         ty2 = el.EV.E2.Ety;
1886         if (tysize(ty) == tysize(ty2))
1887         {
1888             el.EV.E2.Ety = ty;
1889             if (el_match(n,el.EV.E2))
1890             {
1891                 el.EV.E2.Ety = ty2;
1892                 if (!OTleaf(n.Eoper))
1893                 {       el_free(n.EV.E1);
1894                         if (OTbinary(n.Eoper))
1895                                 el_free(n.EV.E2);
1896                 }
1897                 el_copy(n,el.EV.E1);      // make copy of temp
1898                 n.Ety = ty;
1899 
1900                 debug
1901                     if (debugc)
1902                     {   printf("Already moved: LI expression replaced with ");
1903                         WReqn(n);
1904                         printf("\nPreheader %d expression %p ",
1905                         l.Lpreheader.Bdfoidx,l.Lpreheader.Belem);
1906                         WReqn(l.Lpreheader.Belem);
1907                         printf("\n");
1908                     }
1909 
1910                 go.changes++;
1911                 doflow = true;                  // redo flow analysis
1912                 goto Lret;
1913             }
1914             el.EV.E2.Ety = ty2;
1915         }
1916     }
1917 
1918     if (!(*pdomexit & 1))                       // if only sometimes executed
1919     {
1920         if (debugc) printf(" doesn't dominate exit\n");
1921         goto Lret;                              // don't move LI
1922     }
1923 
1924     if (tyaggregate(n.Ety))
1925         goto Lret;
1926 
1927     go.changes++;
1928     doflow = true;                              // redo flow analysis
1929 
1930     t = el_alloctmp(n.Ety);                     /* allocate temporary t */
1931 
1932     debug
1933     {
1934         if (debugc) printf("movelis() introduced new variable '%s' of type ",t.EV.Vsym.Sident.ptr);
1935         if (debugc) WRTYxx(t.Ety);
1936         if (debugc) printf("\n");
1937     }
1938 
1939     n2 = el_calloc();
1940     el_copy(n2,n);                              /* create copy n2 of n  */
1941     ne = el_bin(OPeq,t.Ety,t,n2);               /* create elem t=n2     */
1942     assert(l.Lpreheader);                       /* make sure there is one */
1943     appendelem(ne,&(l.Lpreheader.Belem));       /* append ne to preheader */
1944 
1945     debug
1946         if (debugc)
1947         {
1948             printf("Preheader %d expression %p\n\t",
1949             l.Lpreheader.Bdfoidx,l.Lpreheader.Belem);
1950             WReqn(l.Lpreheader.Belem);
1951             printf("\nLI expression replaced with "); WReqn(t);
1952             printf("\n");
1953         }
1954 
1955     el_copy(n,t);                                 /* replace this elem with t */
1956 
1957     // Remember LI expression in elem list
1958     list_prepend(&l.Llis,ne);
1959 
1960 Lret:
1961 
1962 }
1963 
1964 /***************************
1965  * Append elem to existing elem using an OPcomma elem.
1966  * Input:
1967  *      n       elem to append
1968  *      *pn     elem to append to
1969  */
1970 
1971 private void appendelem(elem *n,elem **pn)
1972 {
1973     assert(n && pn);
1974     if (*pn)                                    /* if this elem exists  */
1975     {
1976         while ((*pn).Eoper == OPcomma)          /* while we see OPcomma elems */
1977         {
1978             (*pn).Ety = n.Ety;
1979             pn = &((*pn).EV.E2);                /* cruise down right side */
1980         }
1981         *pn = el_bin(OPcomma,n.Ety,*pn,n);
1982     }
1983     else
1984         *pn = n;                                /* else create a elem   */
1985 }
1986 
1987 /************** LOOP INDUCTION VARIABLES **********************/
1988 
1989 /*********************
1990  * Free iv list.
1991  */
1992 
1993 private void freeivlist(Iv *biv)
1994 {
1995     Iv *bivnext;
1996 
1997     while (biv)
1998     {
1999         famlist *fln;
2000 
2001         for (famlist *fl = biv.IVfamily; fl; fl = fln)
2002         {
2003             el_free(fl.c1);
2004             el_free(fl.c2);
2005             fln = fl.FLnext;
2006 
2007             fl.FLnext = famlist.freelist;
2008             famlist.freelist = fl;
2009         }
2010         bivnext = biv.IVnext;
2011 
2012         biv.IVnext = Iv.freelist;
2013         Iv.freelist = biv;
2014 
2015         biv = bivnext;
2016     }
2017 }
2018 
2019 /****************************
2020  * Create a new famlist entry.
2021  */
2022 
2023 private famlist * newfamlist(tym_t ty)
2024 {
2025     famlist *fl;
2026     eve c = void;
2027 
2028     memset(&c,0,c.sizeof);
2029     fl = famlist.mycalloc();
2030     fl.FLty = ty;
2031     switch (tybasic(ty))
2032     {   case TYfloat:
2033             c.Vfloat = 1;
2034             break;
2035 
2036         case TYdouble:
2037         case TYdouble_alias:
2038             c.Vdouble = 1;
2039             break;
2040 
2041         case TYldouble:
2042             c.Vldouble = 1;
2043             break;
2044 
2045         case TYbool:
2046         case TYchar:
2047         case TYschar:
2048         case TYuchar:
2049             c.Vchar = 1;
2050             break;
2051 
2052         case TYshort:
2053         case TYushort:
2054         case TYchar16:
2055         case TYwchar_t:             // BUG: what about 4 byte wchar_t's?
2056             c.Vshort = 1;
2057             break;
2058 
2059         case TYsptr:
2060         case TYcptr:
2061         case TYfptr:
2062         case TYvptr:
2063         case TYnptr:
2064         case TYnullptr:
2065         case TYimmutPtr:
2066         case TYsharePtr:
2067         case TYrestrictPtr:
2068         case TYfgPtr:
2069             ty = TYint;
2070             if (I64)
2071                 ty = TYllong;
2072             goto case TYuint;
2073 
2074         case TYint:
2075         case TYuint:
2076             c.Vint = 1;
2077             break;
2078 
2079         case TYhptr:
2080             ty = TYlong;
2081             goto case TYlong;
2082 
2083         case TYlong:
2084         case TYulong:
2085         case TYdchar:
2086         default:
2087             c.Vlong = 1;
2088             break;
2089     }
2090     fl.c1 = el_const(ty,&c);               /* c1 = 1               */
2091     c.Vldouble = 0;
2092     if (typtr(ty))
2093     {
2094         ty = TYint;
2095         if (tybasic(ty) == TYhptr)
2096             ty = TYlong;
2097         if (I64)
2098             ty = TYllong;
2099     }
2100     fl.c2 = el_const(ty,&c);               /* c2 = 0               */
2101     return fl;
2102 }
2103 
2104 /***************************
2105  * Remove induction variables from loop l.
2106  * Loop invariant removal should have been done just previously.
2107  */
2108 
2109 private void loopiv(loop *l)
2110 {
2111     if (debugc) printf("loopiv(%p)\n",l);
2112     assert(l.Livlist == null && l.Lopeqlist == null);
2113     elimspec(l);
2114     if (doflow)
2115     {
2116         flowrd();               /* compute reaching defs                */
2117         flowlv();               /* compute live variables               */
2118         flowae();               // compute available expressions
2119         doflow = false;
2120     }
2121     findbasivs(l);              /* find basic induction variables       */
2122     findopeqs(l);               // find op= variables
2123     findivfams(l);              /* find IV families                     */
2124     elimfrivivs(l);             /* eliminate less useful family IVs     */
2125     intronvars(l);              /* introduce new variables              */
2126     elimbasivs(l);              /* eliminate basic IVs                  */
2127     if (!addblk)                // adding a block changes the Binlv
2128         elimopeqs(l);           // eliminate op= variables
2129 
2130     freeivlist(l.Livlist);      // free up IV list
2131     l.Livlist = null;
2132     freeivlist(l.Lopeqlist);    // free up list
2133     l.Lopeqlist = null;
2134 
2135     /* Do copy propagation and dead assignment elimination        */
2136     /* upon return to optfunc()                                   */
2137 }
2138 
2139 /*************************************
2140  * Find basic IVs of loop l.
2141  * A basic IV x of loop l is a variable x which has
2142  * exactly one assignment within l of the form:
2143  * x += c or x -= c, where c is either a constant
2144  * or a LI.
2145  * Input:
2146  *      go.defnod[] loaded with all the definition elems of the loop
2147  */
2148 
2149 private void findbasivs(loop *l)
2150 {
2151     vec_t poss,notposs;
2152     elem *n;
2153     bool ambdone;
2154 
2155     assert(l);
2156     ambdone = false;
2157     poss = vec_calloc(globsym.top);
2158     notposs = vec_calloc(globsym.top);  /* vector of all variables      */
2159                                         /* (initially all unmarked)     */
2160 
2161     /* for each def in go.defnod[] that is within loop l     */
2162 
2163     foreach (const i; 0 .. go.defnod.length)
2164     {
2165         if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop))
2166             continue;               /* def is not in the loop       */
2167 
2168         n = go.defnod[i].DNelem;
2169         elem_debug(n);
2170         if (OTassign(n.Eoper) && n.EV.E1.Eoper == OPvar)
2171         {
2172             Symbol *s;                  /* if unambiguous def           */
2173 
2174             s = n.EV.E1.EV.Vsym;
2175             if (symbol_isintab(s))
2176             {
2177                 SYMIDX v;
2178 
2179                 v = n.EV.E1.EV.Vsym.Ssymnum;
2180                 if ((n.Eoper == OPaddass || n.Eoper == OPminass ||
2181                      n.Eoper == OPpostinc || n.Eoper == OPpostdec) &&
2182                         (n.EV.E2.Eoper == OPconst || /* if x += c or x -= c          */
2183                          n.EV.E2.Eoper == OPvar && isLI(n.EV.E2)))
2184                 {
2185                     if (vec_testbit(v,poss))
2186                         /* We've already seen this def elem,    */
2187                         /* therefore there is more than one     */
2188                         /* def of v within the loop, therefore  */
2189                         /* v is not a basic IV.                 */
2190                         vec_setbit(v,notposs);
2191                     else
2192                         vec_setbit(v,poss);
2193                 }
2194                 else                    /* else mark as not possible    */
2195                     vec_setbit(v,notposs);
2196             }
2197         }
2198         else                            /* else ambiguous def           */
2199         {
2200             /* mark any vars that could be affected by              */
2201             /* this def as not possible                             */
2202 
2203             if (!ambdone)           /* avoid redundant loops        */
2204             {
2205                 foreach (uint j; 0 .. globsym.top)
2206                 {
2207                     if (!(globsym.tab[j].Sflags & SFLunambig))
2208                         vec_setbit(j,notposs);
2209                 }
2210                 ambdone = true;
2211             }
2212         }
2213     }
2214     static if (0)
2215     {
2216         printf("poss    "); vec_println(poss);
2217         printf("notposs "); vec_println(notposs);
2218     }
2219     vec_subass(poss,notposs);             /* poss = poss - notposs        */
2220 
2221     /* create list of IVs */
2222     uint i;
2223     for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.top; ++i)  // for each basic IV
2224     {
2225         Iv *biv;
2226         Symbol *s;
2227 
2228         /* Skip if we don't want it to be a basic IV (see funcprev())   */
2229         s = globsym.tab[i];
2230         assert(symbol_isintab(s));
2231         if (s.Sflags & SFLnotbasiciv)
2232             continue;
2233 
2234         // Do not use aggregates as basic IVs. This is because the other loop
2235         // code doesn't check offsets into symbols, (assuming everything
2236         // is at offset 0). We could perhaps amend this by allowing basic IVs
2237         // if the struct consists of only one data member.
2238         if (tyaggregate(s.ty()))
2239             continue;
2240 
2241         // Do not use complex types as basic IVs, as the code gen isn't up to it
2242         if (tycomplex(s.ty()))
2243                 continue;
2244 
2245         biv = Iv.mycalloc();
2246         biv.IVnext = l.Livlist;
2247         l.Livlist = biv;               // link into list of IVs
2248 
2249         biv.IVbasic = s;               // symbol of basic IV
2250 
2251         if (debugc) printf("Symbol '%s' (%d) is a basic IV, ", s.Sident.ptr, i);
2252 
2253         /* We have the sym idx of the basic IV. We need to find         */
2254         /* the parent of the increment elem for it.                     */
2255 
2256         /* First find the go.defnod[]      */
2257         foreach (j; 0 .. go.defnod.length)
2258         {
2259             /* If go.defnod is a def of i and it is in the loop        */
2260             if (go.defnod[j].DNelem.EV.E1 &&     /* OPasm are def nodes  */
2261                 go.defnod[j].DNelem.EV.E1.EV.Vsym == s &&
2262                 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop))
2263             {
2264                 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem));
2265                 assert(s == (*biv.IVincr).EV.E1.EV.Vsym);
2266 
2267                 debug if (debugc)
2268                 {
2269                     printf("Increment elem is: "); WReqn(*biv.IVincr);     printf("\n");
2270                 }
2271                 goto L1;
2272             }
2273         }
2274         assert(0);                      /* should have found it         */
2275         /* NOTREACHED */
2276 
2277     L1:
2278     }
2279 
2280     vec_free(poss);
2281     vec_free(notposs);
2282 }
2283 
2284 /*************************************
2285  * Find op= elems of loop l.
2286  * Analogous to findbasivs().
2287  * Used to eliminate useless loop code normally found in benchmark programs.
2288  * Input:
2289  *      go.defnod[] loaded with all the definition elems of the loop
2290  */
2291 
2292 private void findopeqs(loop *l)
2293 {
2294     vec_t poss,notposs;
2295     elem *n;
2296     bool ambdone;
2297 
2298     assert(l);
2299     ambdone = false;
2300     poss = vec_calloc(globsym.top);
2301     notposs = vec_calloc(globsym.top);  // vector of all variables
2302                                         // (initially all unmarked)
2303 
2304     // for each def in go.defnod[] that is within loop l
2305 
2306     foreach (i; 0 .. go.defnod.length)
2307     {
2308         if (!vec_testbit(go.defnod[i].DNblock.Bdfoidx,l.Lloop))
2309             continue;               // def is not in the loop
2310 
2311         n = go.defnod[i].DNelem;
2312         elem_debug(n);
2313         if (OTopeq(n.Eoper) && n.EV.E1.Eoper == OPvar)
2314         {
2315             Symbol *s;                  // if unambiguous def
2316 
2317             s = n.EV.E1.EV.Vsym;
2318             if (symbol_isintab(s))
2319             {
2320                 SYMIDX v;
2321 
2322                 v = n.EV.E1.EV.Vsym.Ssymnum;
2323                 {
2324                     if (vec_testbit(v,poss))
2325                         // We've already seen this def elem,
2326                         // therefore there is more than one
2327                         // def of v within the loop, therefore
2328                         // v is not a basic IV.
2329                         vec_setbit(v,notposs);
2330                     else
2331                         vec_setbit(v,poss);
2332                 }
2333             }
2334         }
2335         else                            // else ambiguous def
2336         {
2337             // mark any vars that could be affected by
2338             // this def as not possible
2339 
2340             if (!ambdone)           // avoid redundant loops
2341             {
2342                 foreach (j; 0 .. globsym.top)
2343                 {
2344                     if (!(globsym.tab[j].Sflags & SFLunambig))
2345                         vec_setbit(j,notposs);
2346                 }
2347                 ambdone = true;
2348             }
2349         }
2350     }
2351 
2352     // Don't use symbols already in Livlist
2353     for (Iv *iv = l.Livlist; iv; iv = iv.IVnext)
2354     {
2355         Symbol *s;
2356 
2357         s = iv.IVbasic;
2358         vec_setbit(s.Ssymnum,notposs);
2359     }
2360 
2361 
2362     static if (0)
2363     {
2364         printf("poss    "); vec_println(poss);
2365         printf("notposs "); vec_println(notposs);
2366     }
2367 
2368     vec_subass(poss,notposs);           // poss = poss - notposs
2369 
2370     // create list of IVs
2371     uint i;
2372     for (i = 0; (i = cast(uint) vec_index(i, poss)) < globsym.top; ++i)  // for each opeq IV
2373     {
2374         Iv *biv;
2375         Symbol *s;
2376 
2377         s = globsym.tab[i];
2378         assert(symbol_isintab(s));
2379 
2380         // Do not use aggregates as basic IVs. This is because the other loop
2381         // code doesn't check offsets into symbols, (assuming everything
2382         // is at offset 0). We could perhaps amend this by allowing basic IVs
2383         // if the struct consists of only one data member.
2384         if (tyaggregate(s.ty()))
2385             continue;
2386 
2387         biv = Iv.mycalloc();
2388         biv.IVnext = l.Lopeqlist;
2389         l.Lopeqlist = biv;             // link into list of IVs
2390 
2391         biv.IVbasic = s;               // symbol of basic IV
2392 
2393         if (debugc) printf("Symbol '%s' (%d) is an opeq IV, ",s.Sident.ptr,i);
2394 
2395         // We have the sym idx of the basic IV. We need to find
2396         // the parent of the increment elem for it.
2397 
2398         // First find the go.defnod[]
2399         foreach (j; 0 .. go.defnod.length)
2400         {
2401             // If go.defnod is a def of i and it is in the loop
2402             if (go.defnod[j].DNelem.EV.E1 &&     // OPasm are def nodes
2403                 go.defnod[j].DNelem.EV.E1.EV.Vsym == s &&
2404                 vec_testbit(go.defnod[j].DNblock.Bdfoidx,l.Lloop))
2405             {
2406                 biv.IVincr = el_parent(go.defnod[j].DNelem,&(go.defnod[j].DNblock.Belem));
2407                 assert(s == (*biv.IVincr).EV.E1.EV.Vsym);
2408 
2409                 debug if (debugc)
2410                 {
2411                     printf("Opeq elem is: "); WReqn(*biv.IVincr);  printf("\n");
2412                 }
2413                 goto Lcont;
2414             }
2415         }
2416         assert(0);                      // should have found it
2417         // NOTREACHED
2418 
2419     Lcont:
2420     }
2421 
2422     vec_free(poss);
2423     vec_free(notposs);
2424 }
2425 
2426 /*****************************
2427  * Find families for each basic IV.
2428  * An IV family is a list of elems of the form
2429  * c1*X+c2, where X is a basic induction variable.
2430  * Note that we do not do divides, because of roundoff error problems.
2431  */
2432 
2433 private void findivfams(loop *l)
2434 {
2435     if (debugc) printf("findivfams(%p)\n",l);
2436     for (Iv *biv = l.Livlist; biv; biv = biv.IVnext)
2437     {
2438         for (uint i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
2439             if (dfo[i].Belem)
2440                 ivfamelems(biv,&(dfo[i].Belem));
2441         /* Fold all the constant expressions in c1 and c2.      */
2442         for (famlist *fl = biv.IVfamily; fl; fl = fl.FLnext)
2443         {
2444             fl.c1 = doptelem(fl.c1,GOALvalue | GOALagain);
2445             fl.c2 = doptelem(fl.c2,GOALvalue | GOALagain);
2446         }
2447     }
2448 }
2449 
2450 /*************************
2451  * Tree walking support routine for findivfams().
2452  *      biv =   basic induction variable pointer
2453  *      pn      pointer to elem
2454  */
2455 
2456 private void ivfamelems(Iv *biv,elem **pn)
2457 {
2458     tym_t ty,c2ty;
2459     elem *n;
2460     elem *n1;
2461     elem *n2;
2462 
2463     assert(pn);
2464     n = *pn;
2465     assert(biv && n);
2466     const op = n.Eoper;
2467     if (OTunary(op))
2468     {
2469        ivfamelems(biv,&n.EV.E1);
2470         n1 = n.EV.E1;
2471         n2 = null;
2472     }
2473     else if (OTbinary(op))
2474     {
2475         ivfamelems(biv,&n.EV.E1);
2476         ivfamelems(biv,&n.EV.E2); /* LTOR or RTOL order is unimportant */
2477         n1 = n.EV.E1;
2478         n2 = n.EV.E2;
2479     }
2480     else                                /* else leaf elem               */
2481         return;                         /* which can't be in the family */
2482 
2483     if (tycomplex(n.Ety))
2484         return;
2485 
2486     if (op == OPmul || op == OPadd || op == OPmin ||
2487         op == OPneg || op == OPshl)
2488     {   /* Note that we are wimping out and not considering             */
2489         /* LI variables as part of c1 and c2, but only constants.       */
2490 
2491         ty = n.Ety;
2492 
2493         /* Since trees are canonicalized, basic induction variables     */
2494         /* will only appear on the left.                                */
2495 
2496         /* Improvement:                                                 */
2497         /* We wish to pick up the cases (biv + li), (biv - li) and      */
2498         /* (li + biv). OPmul and LS with bivs are out, since if we      */
2499         /* try to eliminate the biv, and the loop test is a >, >=,      */
2500         /* <, <=, we have a problem since we don't know if the li       */
2501         /* is negative. (Would have to call swaprel() on it.)           */
2502 
2503         /* If we have (li + var), swap the leaves.                      */
2504         if (op == OPadd && isLI(n1) && n1.Eoper == OPvar && n2.Eoper == OPvar)
2505         {
2506             n.EV.E1 = n2;
2507             n2 = n.EV.E2 = n1;
2508             n1 = n.EV.E1;
2509         }
2510 
2511         // Get rid of case where we painted a far pointer to a long
2512         if (op == OPadd || op == OPmin)
2513         {
2514             int sz;
2515 
2516             sz = tysize(ty);
2517             if (sz == tysize(TYfptr) && !tyfv(ty) &&
2518                 (sz != tysize(n1.Ety) || sz != tysize(n2.Ety)))
2519                 return;
2520         }
2521 
2522         /* Look for function of basic IV (-biv or biv op const)         */
2523         if (n1.Eoper == OPvar && n1.EV.Vsym == biv.IVbasic)
2524         {
2525             if (op == OPneg)
2526             {
2527                 famlist *fl;
2528 
2529                 if (debugc) printf("found (-biv), elem %p\n",n);
2530                 fl = newfamlist(ty);
2531                 fl.FLivty = n1.Ety;
2532                 fl.FLpelem = pn;
2533                 fl.FLnext = biv.IVfamily;
2534                 biv.IVfamily = fl;
2535                 fl.c1 = el_una(op,ty,fl.c1); /* c1 = -1       */
2536             }
2537             else if (n2.Eoper == OPconst ||
2538                      isLI(n2) && (op == OPadd || op == OPmin))
2539             {
2540                 famlist *fl;
2541 
2542                 debug
2543                     if (debugc)
2544                     {   printf("found (biv op const), elem (");
2545                             WReqn(n);
2546                             printf(");\n");
2547                             printf("Types: n1="); WRTYxx(n1.Ety);
2548                             printf(" ty="); WRTYxx(ty);
2549                             printf(" n2="); WRTYxx(n2.Ety);
2550                             printf("\n");
2551                     }
2552 
2553                 fl = newfamlist(ty);
2554                 fl.FLivty = n1.Ety;
2555                 fl.FLpelem = pn;
2556                 fl.FLnext = biv.IVfamily;
2557                 biv.IVfamily = fl;
2558                 switch (op)
2559                 {
2560                     case OPadd:           /* c2 = right           */
2561                         c2ty = n2.Ety;
2562                         if (typtr(fl.c2.Ety))
2563                                 c2ty = fl.c2.Ety;
2564                         goto L1;
2565 
2566                     case OPmin:           /* c2 = -right          */
2567                         c2ty = fl.c2.Ety;
2568                         /* Check for subtracting two pointers */
2569                         if (typtr(c2ty) && typtr(n2.Ety))
2570                         {
2571                             if (tybasic(c2ty) == TYhptr)
2572                                 c2ty = TYlong;
2573                             else
2574                                 c2ty = I64 ? TYllong : TYint;
2575                         }
2576                   L1:
2577                         fl.c2 = el_bin(op,c2ty,fl.c2,el_copytree(n2));
2578                         break;
2579 
2580                     case OPmul:           /* c1 = right           */
2581                     case OPshl:           /* c1 = 1 << right      */
2582                         fl.c1 = el_bin(op,ty,fl.c1,el_copytree(n2));
2583                         break;
2584 
2585                     default:
2586                         assert(0);
2587                 }
2588             }
2589         }
2590 
2591         /* Look for function of existing IV                             */
2592 
2593         for (famlist *f = biv.IVfamily; f; f = f.FLnext)
2594         {
2595             if (*f.FLpelem != n1)          /* not it               */
2596                 continue;
2597 
2598             /* Look for (f op constant)     */
2599             if (op == OPneg)
2600             {
2601                 if (debugc) printf("found (-f), elem %p\n",n);
2602                 /* c1 = -c1; c2 = -c2; */
2603                 f.c1 = el_una(OPneg,ty,f.c1);
2604                 f.c2 = el_una(OPneg,ty,f.c2);
2605                 f.FLty = ty;
2606                 f.FLpelem = pn;        /* replace with new IV  */
2607             }
2608             else if (n2.Eoper == OPconst ||
2609                      isLI(n2) && (op == OPadd || op == OPmin))
2610             {
2611                 debug
2612                     if (debugc)
2613                     {
2614                         printf("found (f op const), elem (");
2615                         WReqn(n);
2616                         assert(*pn == n);
2617                         printf(");\n");
2618                         elem_print(n);
2619                     }
2620 
2621                 switch (op)
2622                 {
2623                     case OPmul:
2624                     case OPshl:
2625                         f.c1 = el_bin(op,ty,f.c1,el_copytree(n2));
2626                         break;
2627 
2628                     case OPadd:
2629                     case OPmin:
2630                         break;
2631 
2632                     default:
2633                         assert(0);
2634                 }
2635                 f.c2 = el_bin(op,ty,f.c2,el_copytree(n2));
2636                 f.FLty = ty;
2637                 f.FLpelem = pn;        /* replace with new IV  */
2638             } /* else if */
2639         } /* for */
2640     } /* if */
2641 }
2642 
2643 /*********************************
2644  * Eliminate frivolous family ivs, that is,
2645  * if we can't eliminate the BIV, then eliminate family ivs that
2646  * differ from it only by a constant.
2647  */
2648 
2649 private void elimfrivivs(loop *l)
2650 {
2651     for (Iv *biv = l.Livlist; biv; biv = biv.IVnext)
2652     {
2653         int nfams;
2654         int nrefs;
2655 
2656         if (debugc) printf("elimfrivivs()\n");
2657         /* Compute number of family ivs for biv */
2658         nfams = 0;
2659         for (famlist *fl = biv.IVfamily; fl; fl = fl.FLnext)
2660                 nfams++;
2661         if (debugc) printf("nfams = %d\n",nfams);
2662 
2663         /* Compute number of references to biv  */
2664         if (onlyref(biv.IVbasic,l,*biv.IVincr,&nrefs))
2665                 nrefs--;
2666         if (debugc) printf("nrefs = %d\n",nrefs);
2667         assert(nrefs + 1 >= nfams);
2668         if (nrefs > nfams ||            // if we won't eliminate the biv
2669             (!I16 && nrefs == nfams))
2670         {   /* Eliminate any family ivs that only differ by a constant  */
2671             /* from biv                                                 */
2672             for (famlist *fl = biv.IVfamily; fl; fl = fl.FLnext)
2673             {
2674                 elem *ec1 = fl.c1;
2675                 targ_llong c;
2676 
2677                 if (elemisone(ec1) ||
2678                     // Eliminate fl's that can be represented by
2679                     // an addressing mode
2680                     (!I16 && ec1.Eoper == OPconst && tyintegral(ec1.Ety) &&
2681                      ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8)
2682                     )
2683                    )
2684                 {
2685                     fl.FLtemp = FLELIM;
2686 
2687                     debug
2688                         if (debugc)
2689                         {
2690                             printf("Eliminating frivolous IV ");
2691                             WReqn(*fl.FLpelem);
2692                             printf("\n");
2693                         }
2694                 }
2695             }
2696         }
2697     }
2698 }
2699 
2700 
2701 /******************************
2702  * Introduce new variables.
2703  */
2704 
2705 private void intronvars(loop *l)
2706 {
2707     elem *T;
2708     elem *ne;
2709     elem *t2;
2710     elem *C2;
2711     elem *cmul;
2712     tym_t ty,tyr;
2713 
2714     if (debugc) printf("intronvars(%p)\n",l);
2715     for (Iv *biv = l.Livlist; biv; biv = biv.IVnext)      // for each basic IV
2716     {
2717         elem *bivinc = *biv.IVincr;   /* ptr to increment elem */
2718 
2719         for (famlist *fl = biv.IVfamily; fl; fl = fl.FLnext)
2720         {                               /* for each IV in family of biv  */
2721             if (fl.FLtemp == FLELIM)   /* if already eliminated         */
2722                 continue;
2723 
2724             /* If induction variable can be written as a simple function */
2725             /* of a previous induction variable, skip it.                */
2726             if (funcprev(biv,fl))
2727                 continue;
2728 
2729             ty = fl.FLty;
2730             T = el_alloctmp(ty);        /* allocate temporary T          */
2731             fl.FLtemp = T.EV.Vsym;
2732 
2733             debug
2734             {
2735                 if (debugc) printf("intronvars() introduced new variable '%s' of type ",T.EV.Vsym.Sident.ptr);
2736                 if (debugc) WRTYxx(ty);
2737                 if (debugc) printf("\n");
2738             }
2739 
2740             /* append elem T=biv*C1+C2 to preheader */
2741             /* ne = biv*C1      */
2742             tyr = fl.FLivty;                   /* type of biv              */
2743             ne = el_var(biv.IVbasic);
2744             ne.Ety = tyr;
2745             if (!elemisone(fl.c1))             /* don't multiply ptrs by 1 */
2746                 ne = el_bin(OPmul,tyr,ne,el_copytree(fl.c1));
2747             if (tyfv(tyr) && tysize(ty) == SHORTSIZE)
2748                 ne = el_una(OP32_16,ty,ne);
2749             C2 = el_copytree(fl.c2);
2750             t2 = el_bin(OPadd,ty,ne,C2);        /* t2 = ne + C2         */
2751             ne = el_bin(OPeq,ty,el_copytree(T),t2);
2752             appendelem(ne, &(l.Lpreheader.Belem));
2753 
2754             /* prefix T+=C1*C to elem biv+=C                            */
2755             /* Must prefix in case the value of the expression (biv+=C) */
2756             /* is used by somebody up the tree.                         */
2757             cmul = el_bin(OPmul,fl.c1.Ety,el_copytree(fl.c1),
2758                                           el_copytree(bivinc.EV.E2));
2759             t2 = el_bin(bivinc.Eoper,ty,el_copytree(T),cmul);
2760             t2 = doptelem(t2,GOALvalue | GOALagain);
2761             *biv.IVincr = el_bin(OPcomma,bivinc.Ety,t2,bivinc);
2762             biv.IVincr = &((*biv.IVincr).EV.E2);
2763 
2764             debug
2765                 if (debugc)
2766                 {
2767                     printf("Replacing elem (");
2768                     WReqn(*fl.FLpelem);
2769                     printf(") with '%s'\n",T.EV.Vsym.Sident.ptr);
2770                     printf("The init elem is (");
2771                     WReqn(ne);
2772                     printf(");\nThe increment elem is (");
2773                     WReqn(t2);
2774                     printf(")\n");
2775                 }
2776 
2777             el_free(*fl.FLpelem);
2778             *fl.FLpelem = T;           /* replace elem n with ref to T  */
2779             doflow = true;              /* redo flow analysis           */
2780             go.changes++;
2781         } /* for */
2782     } /* for */
2783 }
2784 
2785 /*******************************
2786  * Determine if induction variable can be rewritten as a simple
2787  * function of a previously generated temporary.
2788  * This can frequently
2789  * generate less code than that of an all-new temporary (especially
2790  * if it is the same as a previous temporary!).
2791  * Input:
2792  *      biv             Basic induction variable
2793  *      fl              Item in biv's family list we are looking at
2794  * Returns:
2795  *      false           Caller should create a new induction variable.
2796  *      true            *FLpelem is replaced with function of a previous
2797  *                      induction variable. FLtemp is set to FLELIM to
2798  *                      indicate this.
2799  */
2800 
2801 private bool funcprev(Iv *biv,famlist *fl)
2802 {
2803     tym_t tymin;
2804     int sz;
2805     elem *e1;
2806     elem *e2;
2807     elem *flse1;
2808 
2809     debug if (debugc)
2810         printf("funcprev\n");
2811 
2812     for (famlist *fls = biv.IVfamily; fls != fl; fls = fls.FLnext)
2813     {
2814         assert(fls);                   /* fl must be in list           */
2815         if (fls.FLtemp == FLELIM)      /* no iv we can use here        */
2816             continue;
2817 
2818         /* The multipliers must match   */
2819         if (!el_match(fls.c1,fl.c1))
2820             continue;
2821 
2822         /* If the c2's match also, we got it easy */
2823         if (el_match(fls.c2,fl.c2))
2824         {
2825             if (tysize(fl.FLty) > tysize(fls.FLtemp.ty()))
2826                 continue;              /* can't increase size of var   */
2827             flse1 = el_var(fls.FLtemp);
2828             flse1.Ety = fl.FLty;
2829             goto L2;
2830         }
2831 
2832         /* The difference is only in the addition. Therefore, replace
2833            *fl.FLpelem with:
2834                 case 1:         (fl.c2 + (fls.FLtemp - fls.c2))
2835                 case 2:         (fls.FLtemp + (fl.c2 - fls.c2))
2836          */
2837         e1 = fl.c2;
2838         /* Subtracting relocatables usually generates slow code for     */
2839         /* linkers that can't handle arithmetic on relocatables.        */
2840         if (typtr(fls.c2.Ety))
2841         {
2842             if (fls.c2.Eoper == OPrelconst &&
2843                 !(fl.c2.Eoper == OPrelconst &&
2844                   fl.c2.EV.Vsym == fls.c2.EV.Vsym)
2845                )
2846                 continue;
2847         }
2848         flse1 = el_var(fls.FLtemp);
2849         e2 = flse1;                             /* assume case 1        */
2850         tymin = e2.Ety;
2851         if (typtr(fls.c2.Ety))
2852         {
2853             if (!typtr(tymin))
2854             {
2855                 if (typtr(e1.Ety))
2856                 {
2857                     e1 = e2;
2858                     e2 = fl.c2;            /* case 2               */
2859                 }
2860                 else                        /* can't subtract fptr  */
2861                     goto L1;
2862             }
2863             if (tybasic(fls.c2.Ety) == TYhptr)
2864                 tymin = TYlong;
2865             else
2866                 tymin = I64 ? TYllong : TYint;         /* type of (ptr - ptr) */
2867         }
2868 
2869         /* If e1 and fls.c2 are fptrs, and are not from the same       */
2870         /* segment, we cannot subtract them.                            */
2871         if (tyfv(e1.Ety) && tyfv(fls.c2.Ety))
2872         {
2873             if (e1.Eoper != OPrelconst || fls.c2.Eoper != OPrelconst)
2874                 goto L1;                /* assume expressions have diff segs */
2875             if (e1.EV.Vsym.Sclass != fls.c2.EV.Vsym.Sclass)
2876             {
2877                L1:
2878                 el_free(flse1);
2879                 continue;
2880             }
2881         }
2882 
2883         /* Some more type checking...   */
2884         sz = tysize(fl.FLty);
2885         if (sz != tysize(e1.Ety) &&
2886             sz != tysize(tymin))
2887             goto L1;
2888 
2889         /* Do some type checking (can't add pointers and get a pointer!) */
2890         //if (typtr(fl.FLty) && typtr(e1.Ety) && typtr(tymin))
2891             //goto L1;
2892         /* Construct (e1 + (e2 - fls.c2))      */
2893         flse1 = el_bin(OPadd,fl.FLty,
2894                             e1,
2895                             el_bin(OPmin,tymin,
2896                                     e2,
2897                                     el_copytree(fls.c2)));
2898         if (sz < tysize(tymin) && sz == tysize(e1.Ety))
2899         {
2900             assert(I16);
2901             flse1.EV.E2 = el_una(OPoffset,fl.FLty,flse1.EV.E2);
2902         }
2903 
2904         flse1 = doptelem(flse1,GOALvalue | GOALagain);
2905         fl.c2 = null;
2906     L2:
2907         debug if (debugc)
2908         {
2909             printf("Replacing ");
2910             WReqn(*fl.FLpelem);
2911             printf(" with ");
2912             WReqn(flse1);
2913             printf("\n");
2914         }
2915 
2916         el_free(*fl.FLpelem);
2917         *fl.FLpelem = flse1;
2918 
2919         /* Fix the iv so when we do loops again, we won't create        */
2920         /* yet another iv, which is just what funcprev() is supposed    */
2921         /* to prevent.                                                  */
2922         fls.FLtemp.Sflags |= SFLnotbasiciv;
2923 
2924         fl.FLtemp = FLELIM;            /* mark iv as being gone        */
2925         go.changes++;
2926         doflow = true;
2927         return true;                    /* it was replaced              */
2928     }
2929     return false;                       /* need to create a new variable */
2930 }
2931 
2932 /***********************
2933  * Eliminate basic IVs.
2934  */
2935 
2936 private void elimbasivs(loop *l)
2937 {
2938     if (debugc) printf("elimbasivs(%p)\n",l);
2939     for (Iv *biv = l.Livlist; biv; biv = biv.IVnext)        // for each basic IV
2940     {
2941         /* Can't eliminate this basic IV if we have a goal for the      */
2942         /* increment elem.                                              */
2943         // Be careful about Nflags being in a union...
2944         elem* einc = *biv.IVincr;
2945         if (!(einc.Nflags & NFLnogoal))
2946             continue;
2947 
2948         Symbol* X = biv.IVbasic;
2949         assert(symbol_isintab(X));
2950         tym_t ty = X.ty();
2951         int refcount;
2952         elem** pref = onlyref(X,l,einc,&refcount);
2953 
2954         /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */
2955         if (pref != null && refcount <= 1)
2956         {
2957             famlist* fl = biv.IVfamily;
2958             if (!fl)                // if no elems in family of biv
2959                 continue;
2960 
2961             elem* ref_ = *pref;
2962 
2963             /* Replace (X) with (X != 0)                            */
2964             if (ref_.Eoper == OPvar)
2965                 ref_ = *pref = el_bin(OPne,TYint,ref_,el_long(ref_.Ety,0L));
2966 
2967             fl = simfl(fl,ty);      /* find simplest elem in family */
2968             if (!fl)
2969                 continue;
2970 
2971             // Don't do the replacement if we would replace a
2972             // signed comparison with an unsigned one
2973             tym_t flty = fl.FLty;
2974             if (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety))
2975                 flty = touns(flty);
2976 
2977             if (ref_.Eoper >= OPle && ref_.Eoper <= OPge &&
2978                 !(tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)) &&
2979                  tyuns(flty))
2980                     continue;
2981 
2982             /* if we have (e relop X), replace it with (X relop e)  */
2983             if (ref_.EV.E2.Eoper == OPvar && ref_.EV.E2.EV.Vsym == X)
2984             {
2985                 elem* tmp = ref_.EV.E2;
2986                 ref_.EV.E2 = ref_.EV.E1;
2987                 ref_.EV.E1 = tmp;
2988                 ref_.Eoper = cast(ubyte)swaprel(ref_.Eoper);
2989             }
2990 
2991             // If e*c1+c2 would result in a sign change or an overflow
2992             // then we can't do it
2993             if (fl.c1.Eoper == OPconst)
2994             {
2995                 targ_llong c1 = el_tolong(fl.c1);
2996                 const int sz = tysize(ty);
2997                 if (sz == SHORTSIZE &&
2998                     ((ref_.EV.E2.Eoper == OPconst &&
2999                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFL) ||
3000                      c1 & ~0x7FFFL)
3001                    )
3002                     continue;
3003 
3004                 if (sz == LONGSIZE &&
3005                     ((ref_.EV.E2.Eoper == OPconst &&
3006                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFL) ||
3007                      c1 & ~0x7FFFFFFFL)
3008                    )
3009                     continue;
3010                 if (sz == LLONGSIZE &&
3011                     ((ref_.EV.E2.Eoper == OPconst &&
3012                     c1 * el_tolong(ref_.EV.E2) & ~0x7FFFFFFFFFFFFFFFL) ||
3013                      c1 & ~0x7FFFFFFFFFFFFFFFL)
3014                    )
3015                     continue;
3016             }
3017 
3018             /* If the incr is a decrement, and the relational is < or <=,
3019              * and its unsigned, then don't do it because it could drop below 0.
3020              * https://issues.dlang.org/show_bug.cgi?id=16189
3021              */
3022             if ((einc.Eoper == OPminass || einc.EV.E2.Eoper == OPconst && el_tolong(einc.EV.E2) < 0) &&
3023                 (ref_.Eoper == OPlt || ref_.Eoper == OPle) &&
3024                 (tyuns(ref_.EV.E1.Ety) | tyuns(ref_.EV.E2.Ety)))
3025                 continue;
3026 
3027             /* If loop started out with a signed conditional that was
3028              * replaced with an unsigned one, don't do it if c2
3029              * is less than 0.
3030              */
3031             if (ref_.Nflags & NFLtouns && fl.c2.Eoper == OPconst)
3032             {
3033                 targ_llong c2 = el_tolong(fl.c2);
3034                 if (c2 < 0)
3035                     continue;
3036             }
3037 
3038             elem *refE2 = el_copytree(ref_.EV.E2);
3039             int refEoper = ref_.Eoper;
3040 
3041             /* if c1 < 0 and relop is < <= > >=
3042                then adjust relop as if both sides were multiplied
3043                by -1
3044              */
3045             if (!tyuns(ty) &&
3046                 (tyintegral(ty) && el_tolong(fl.c1) < 0 ||
3047                  tyfloating(ty) && el_toldoubled(fl.c1) < 0.0))
3048                 refEoper = swaprel(refEoper);
3049 
3050             /* Replace (X relop e) with (X relop (short)e)
3051                if T is 1 word but e is 2
3052              */
3053             if (tysize(flty) == SHORTSIZE &&
3054                 tysize(refE2.Ety) == LONGSIZE)
3055                 refE2 = el_una(OP32_16,flty,refE2);
3056 
3057             /* replace e with e*c1 + c2             */
3058             elem* C2 = el_copytree(fl.c2);
3059             elem* fofe = el_bin(OPadd,flty,
3060                             el_bin(OPmul,refE2.Ety,
3061                                     refE2,
3062                                     el_copytree(fl.c1)),
3063                             C2);
3064             fofe = doptelem(fofe,GOALvalue | GOALagain);    // fold any constants
3065 
3066             if (tyuns(flty) && refEoper == OPge &&
3067                 fofe.Eoper == OPconst && el_allbits(fofe, 0) &&
3068                 fl.c2.Eoper == OPconst && !el_allbits(fl.c2, 0))
3069             {
3070                 /* Don't do it if replacement will result in
3071                  * an unsigned T>=0 which will be an infinite loop.
3072                  */
3073                 el_free(fofe);
3074                 continue;
3075             }
3076 
3077             if (debugc) printf("Eliminating basic IV '%s'\n",X.Sident.ptr);
3078 
3079             debug if (debugc)
3080             {
3081                 printf("Comparison replaced: ");
3082                 WReqn(ref_);
3083                 printf(" with ");
3084             }
3085 
3086             el_free(ref_.EV.E2);
3087             ref_.EV.E2 = refE2;
3088             ref_.Eoper = cast(ubyte)refEoper;
3089 
3090             elimass(einc);          // dump the increment elem
3091 
3092             // replace X with T
3093             assert(ref_.EV.E1.EV.Voffset == 0);
3094             ref_.EV.E1.EV.Vsym = fl.FLtemp;
3095             ref_.EV.E1.Ety = flty;
3096             ref_.EV.E2 = fofe;
3097 
3098             /* If sizes of expression worked out wrong...
3099                Which can happen if we have (int)ptr==e
3100              */
3101             if (OTbinary(fofe.Eoper))         // if didn't optimize it away
3102             {
3103                 const tym_t fofety = fofe.Ety;
3104                 const int sz = tysize(fofety);
3105                 tym_t ty1 = fofe.EV.E1.Ety;
3106                 const tym_t ty2 = fofe.EV.E2.Ety;
3107                 /* Sizes of + expression must all be the same       */
3108                 if (sz != tysize(ty1) &&
3109                     sz != tysize(ty2)
3110                    )
3111                 {
3112                     if (tyuns(fofety))      // if unsigned comparison
3113                         ty1 = touns(ty1);   /* to unsigned type     */
3114                     fofe.Ety = ty1;
3115                     ref_.EV.E1.Ety = ty1;
3116                 }
3117             }
3118 
3119             /* Fix if leaves of compare are TYfptrs and the compare */
3120             /* operator is < <= > >=.                               */
3121             if (ref_.Eoper >= OPle && ref_.Eoper <= OPge && tyfv(ref_.EV.E1.Ety))
3122             {
3123                 assert(tyfv(ref_.EV.E2.Ety));
3124                 ref_.EV.E1 = el_una(OPoffset,TYuint,ref_.EV.E1);
3125                 ref_.EV.E2 = el_una(OPoffset,TYuint,fofe);
3126             }
3127 
3128             debug if (debugc)
3129             {
3130                 WReqn(ref_);
3131                 printf("\n");
3132             }
3133 
3134             go.changes++;
3135             doflow = true;                  /* redo flow analysis   */
3136 
3137             /* if X is live on entry to any successor S outside loop */
3138             /*      prepend elem X=(T-c2)/c1 to S.Belem     */
3139 
3140             for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3141             {
3142                 elem *ne;
3143                 block *b;
3144 
3145                 foreach (bl; ListRange(dfo[i].Bsucc))
3146                 {   /* for each successor   */
3147                     b = list_block(bl);
3148                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3149                         continue;       /* inside loop  */
3150                     if (!vec_testbit(X.Ssymnum,b.Binlv))
3151                         continue;       /* not live     */
3152 
3153                     C2 = el_copytree(fl.c2);
3154                     ne = el_bin(OPmin,ty,
3155                             el_var(fl.FLtemp),
3156                             C2);
3157                     if (tybasic(ne.EV.E1.Ety) == TYfptr &&
3158                         tybasic(ne.EV.E2.Ety) == TYfptr)
3159                     {
3160                         ne.Ety = I64 ? TYllong : TYint;
3161                         if (tylong(ty) && _tysize[TYint] == 2)
3162                             ne = el_una(OPs16_32,ty,ne);
3163                     }
3164 
3165                     ne = el_bin(OPeq,X.ty(),
3166                             el_var(X),
3167                             el_bin(OPdiv,ne.Ety,
3168                                 ne,
3169                                 el_copytree(fl.c1)));
3170 
3171                     debug if (debugc)
3172                     {
3173                         printf("Adding (");
3174                         WReqn(ne);
3175                         printf(") to exit block B%d\n",b.Bdfoidx);
3176                         //elem_print(ne);
3177                     }
3178 
3179                     /* We have to add a new block if there is */
3180                     /* more than one predecessor to b.      */
3181                     if (list_next(b.Bpred))
3182                     {
3183                         block *bn = block_calloc();
3184                         bn.Btry = b.Btry;
3185                         numblks++;
3186                         assert(numblks <= maxblks);
3187                         bn.BC = BCgoto;
3188                         bn.Bnext = dfo[i].Bnext;
3189                         dfo[i].Bnext = bn;
3190                         list_append(&(bn.Bsucc),b);
3191                         list_append(&(bn.Bpred),dfo[i]);
3192                         bl.ptr = cast(void *)bn;
3193                         foreach (bl2; ListRange(b.Bpred))
3194                             if (list_block(bl2) == dfo[i])
3195                             {
3196                                 bl2.ptr = cast(void *)bn;
3197                                 goto L2;
3198                             }
3199                         assert(0);
3200                     L2:
3201                         b = bn;
3202                         addblk = true;
3203                     }
3204 
3205                     if (b.Belem)
3206                         b.Belem =
3207                             el_bin(OPcomma,b.Belem.Ety,
3208                                 ne,b.Belem);
3209                     else
3210                         b.Belem = ne;
3211                     go.changes++;
3212                     doflow = true;  /* redo flow analysis   */
3213                 } /* for each successor */
3214             } /* foreach exit block */
3215             if (addblk)
3216                 return;
3217         }
3218         else if (refcount == 0)                 /* if no uses of IV in loop  */
3219         {
3220             /* Eliminate the basic IV if it is not live on any successor */
3221             for (uint i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3222             {
3223                 foreach (bl; ListRange(dfo[i].Bsucc))
3224                 {   /* for each successor   */
3225                     block *b = list_block(bl);
3226                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3227                         continue;       /* inside loop  */
3228                     if (vec_testbit(X.Ssymnum,b.Binlv))
3229                         goto L1;        /* live         */
3230                 }
3231             }
3232 
3233             if (debugc) printf("No uses, eliminating basic IV '%s' (%p)\n",X.Sident.ptr,X);
3234 
3235             /* Dump the increment elem                              */
3236             /* (Replace it with an OPconst that only serves as a    */
3237             /* placeholder in the tree)                             */
3238             *(biv.IVincr) = el_selecte2(*(biv.IVincr));
3239 
3240             go.changes++;
3241             doflow = true;                  /* redo flow analysis   */
3242           L1:
3243         }
3244     } /* for */
3245 }
3246 
3247 
3248 /***********************
3249  * Eliminate opeq IVs that are not used outside the loop.
3250  */
3251 
3252 private void elimopeqs(loop *l)
3253 {
3254     elem **pref;
3255     Symbol *X;
3256     int refcount;
3257 
3258     if (debugc) printf("elimopeqs(%p)\n",l);
3259     for (Iv *biv = l.Lopeqlist; biv; biv = biv.IVnext)    // for each opeq IV
3260     {
3261         // Can't eliminate this basic IV if we have a goal for the
3262         // increment elem.
3263         // Be careful about Nflags being in a union...
3264         if (!((*biv.IVincr).Nflags & NFLnogoal))
3265             continue;
3266 
3267         X = biv.IVbasic;
3268         assert(symbol_isintab(X));
3269         pref = onlyref(X,l,*biv.IVincr,&refcount);
3270 
3271         // if only ref of X is of the form (X) or (X relop e) or (e relop X)
3272         if (pref != null && refcount <= 1)
3273         { }
3274         else if (refcount == 0)                 // if no uses of IV in loop
3275         {   // Eliminate the basic IV if it is not live on any successor
3276             uint i;
3277             for (i = 0; (i = cast(uint) vec_index(i, l.Lexit)) < dfo.length; ++i)  // for each exit block
3278             {
3279                 foreach (bl; ListRange(dfo[i].Bsucc))
3280                 {   // for each successor
3281                     block *b = list_block(bl);
3282                     if (vec_testbit(b.Bdfoidx,l.Lloop))
3283                         continue;       // inside loop
3284                     if (vec_testbit(X.Ssymnum,b.Binlv))
3285                         goto L1;        // live
3286                 }
3287             }
3288 
3289             if (debugc) printf("No uses, eliminating opeq IV '%s' (%p)\n",X.Sident.ptr,X);
3290 
3291             // Dump the increment elem
3292             // (Replace it with an OPconst that only serves as a
3293             // placeholder in the tree)
3294             *(biv.IVincr) = el_selecte2(*(biv.IVincr));
3295 
3296             go.changes++;
3297             doflow = true;                      // redo flow analysis
3298         L1:
3299         }
3300     }
3301 }
3302 
3303 /**************************
3304  * Find simplest elem in family.
3305  * Input:
3306  *      tym     type of basic IV
3307  * Return null if none found.
3308  */
3309 
3310 private famlist * simfl(famlist *fl,tym_t tym)
3311 {
3312     famlist *sofar;
3313 
3314     assert(fl);
3315     sofar = null;
3316     for (; fl; fl = fl.FLnext)
3317     {
3318         if (fl.FLtemp == FLELIM)       /* no variable, so skip it      */
3319             continue;
3320         /* If size of replacement is less than size of biv, we could    */
3321         /* be in trouble due to loss of precision.                      */
3322         if (size(fl.FLtemp.ty()) < size(tym))
3323             continue;
3324         sofar = flcmp(sofar,fl);        /* pick simplest                */
3325     }
3326     return sofar;
3327 }
3328 
3329 /**************************
3330  * Return simpler of two family elems.
3331  * There is room for improvement, namely if
3332  *      f1.c1 = 2, f2.c1 = 27
3333  * then pick f1 because it is a shift.
3334  */
3335 
3336 private famlist * flcmp(famlist *f1,famlist *f2)
3337 {
3338     tym_t ty;
3339     eve *t1;
3340     eve *t2;
3341 
3342     assert(f2);
3343     if (!f1)
3344         goto Lf2;
3345     t1 = &(f1.c1.EV);
3346     t2 = &(f2.c1.EV);
3347     ty = (*f1.FLpelem).Ety;           /* type of elem                 */
3348 
3349     static if (0)
3350     {
3351         printf("f1: c1 = %d, c2 = %d\n",t1.Vshort,f1.c2.EV.Vshort);
3352         printf("f2: c1 = %d, c2 = %d\n",t2.Vshort,f2.c2.EV.Vshort);
3353         WRTYxx((*f1.FLpelem).Ety);
3354         WRTYxx((*f2.FLpelem).Ety);
3355     }
3356 
3357     /* Wimp out and just pick f1 if the types don't match               */
3358     if (tysize(ty) == tysize((*f2.FLpelem).Ety))
3359     {
3360         switch (tybasic(ty))
3361         {   case TYbool:
3362             case TYchar:
3363             case TYschar:
3364             case TYuchar:
3365                 if (t2.Vuchar == 1 ||
3366                     t1.Vuchar != 1 && f2.c2.EV.Vuchar == 0)
3367                         goto Lf2;
3368                 break;
3369 
3370             case TYshort:
3371             case TYushort:
3372             case TYchar16:
3373             case TYwchar_t:     // BUG: what about 4 byte wchar_t's?
3374             case_short:
3375                 if (t2.Vshort == 1 ||
3376                     t1.Vshort != 1 && f2.c2.EV.Vshort == 0)
3377                         goto Lf2;
3378                 break;
3379 
3380             case TYsptr:
3381             case TYcptr:
3382             case TYnptr:        // BUG: 64 bit pointers?
3383             case TYimmutPtr:
3384             case TYsharePtr:
3385             case TYrestrictPtr:
3386             case TYfgPtr:
3387             case TYnullptr:
3388             case TYint:
3389             case TYuint:
3390                 if (_tysize[TYint] == SHORTSIZE)
3391                     goto case_short;
3392                 else
3393                     goto case_long;
3394 
3395             case TYlong:
3396             case TYulong:
3397             case TYdchar:
3398             case TYfptr:
3399             case TYvptr:
3400             case TYhptr:
3401             case_long:
3402                 if (t2.Vlong == 1 ||
3403                     t1.Vlong != 1 && f2.c2.EV.Vlong == 0)
3404                         goto Lf2;
3405                 break;
3406 
3407             case TYfloat:
3408                 if (t2.Vfloat == 1 ||
3409                     t1.Vfloat != 1 && f2.c2.EV.Vfloat == 0)
3410                         goto Lf2;
3411                 break;
3412 
3413             case TYdouble:
3414             case TYdouble_alias:
3415                 if (t2.Vdouble == 1.0 ||
3416                     t1.Vdouble != 1.0 && f2.c2.EV.Vdouble == 0)
3417                         goto Lf2;
3418                 break;
3419 
3420             case TYldouble:
3421                 if (t2.Vldouble == 1.0 ||
3422                     t1.Vldouble != 1.0 && f2.c2.EV.Vldouble == 0)
3423                         goto Lf2;
3424                 break;
3425 
3426             case TYllong:
3427             case TYullong:
3428                 if (t2.Vllong == 1 ||
3429                     t1.Vllong != 1 && f2.c2.EV.Vllong == 0)
3430                         goto Lf2;
3431                 break;
3432 
3433             default:
3434                 assert(0);
3435         }
3436     }
3437     //printf("picking f1\n");
3438     return f1;
3439 
3440 Lf2:
3441     //printf("picking f2\n");
3442     return f2;
3443 }
3444 
3445 /************************************
3446  * Input:
3447  *      x       basic IV symbol
3448  *      incn    increment elem for basic IV X.
3449  * Output:
3450  *      *prefcount      # of references to X other than the increment elem
3451  * Returns:
3452  *      If ref of X in loop l is of the form (X relop e) or (e relop X)
3453  *              Return the relop elem
3454  *      Else
3455  *              Return null
3456  */
3457 
3458 private __gshared
3459 {
3460     int count;
3461     elem **nd;
3462     elem *sincn;
3463     Symbol *X;
3464 }
3465 
3466 private elem ** onlyref(Symbol *x,loop *l,elem *incn,int *prefcount)
3467 {
3468     uint i;
3469 
3470     //printf("onlyref('%s')\n", x.Sident.ptr);
3471     X = x;                                /* save some parameter passing  */
3472     assert(symbol_isintab(x));
3473     sincn = incn;
3474 
3475     debug
3476       if (!(X.Ssymnum < globsym.top && l && incn))
3477           printf("X = %d, globsym.top = %d, l = %p, incn = %p\n",X.Ssymnum,globsym.top,l,incn);
3478 
3479     assert(X.Ssymnum < globsym.top && l && incn);
3480     count = 0;
3481     nd = null;
3482     for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3483     {
3484         block *b;
3485 
3486         b = dfo[i];
3487         if (b.Belem)
3488         {
3489             countrefs(&b.Belem,b.BC == BCiftrue);
3490         }
3491     }
3492 
3493     static if (0)
3494     {
3495         printf("count = %d, nd = (");
3496         if (nd) WReqn(*nd);
3497         printf(")\n");
3498     }
3499 
3500     *prefcount = count;
3501     return nd;
3502 }
3503 
3504 
3505 /******************************
3506  * Count elems of the form (X relop e) or (e relop X).
3507  * Do not count the node if it is the increment node (sincn).
3508  * Input:
3509  *      flag:   true if block wants to test the elem
3510  */
3511 
3512 private void countrefs(elem **pn,bool flag)
3513 {
3514     elem *n = *pn;
3515 
3516     assert(n);
3517     if (n == sincn)                       /* if it is the increment elem  */
3518     {
3519         if (OTbinary(n.Eoper))
3520             countrefs(&n.EV.E2, false);
3521         return;                         // don't count lvalue
3522     }
3523     if (OTunary(n.Eoper))
3524         countrefs(&n.EV.E1,false);
3525     else if (OTbinary(n.Eoper))
3526     {
3527         if (OTrel(n.Eoper))
3528         {
3529             elem *e1 = n.EV.E1;
3530 
3531             assert(e1.Eoper != OPcomma);
3532             if (e1 == sincn &&
3533                 (e1.Eoper == OPeq || OTopeq(e1.Eoper)))
3534                 goto L1;
3535 
3536             /* Check both subtrees to see if n is the comparison node,
3537              * that is, if X is a leaf of the comparison.
3538              */
3539             if (e1.Eoper == OPvar && e1.EV.Vsym == X && !countrefs2(n.EV.E2) ||
3540                 n.EV.E2.Eoper == OPvar && n.EV.E2.EV.Vsym == X && !countrefs2(e1))
3541                 nd = pn;                /* found the relop node */
3542         }
3543     L1:
3544         countrefs(&n.EV.E1,false);
3545         countrefs(&n.EV.E2,(flag && n.Eoper == OPcomma));
3546     }
3547     else if ((n.Eoper == OPvar || n.Eoper == OPrelconst) && n.EV.Vsym == X)
3548     {
3549         if (flag)
3550             nd = pn;                    /* comparing it with 0          */
3551         count++;                        /* found another reference      */
3552     }
3553 }
3554 
3555 /*******************************
3556  * Count number of times symbol X appears in elem tree e.
3557  */
3558 
3559 private int countrefs2(elem *e)
3560 {
3561     elem_debug(e);
3562     while (OTunary(e.Eoper))
3563         e = e.EV.E1;
3564     if (OTbinary(e.Eoper))
3565         return countrefs2(e.EV.E1) + countrefs2(e.EV.E2);
3566     return ((e.Eoper == OPvar || e.Eoper == OPrelconst) &&
3567             e.EV.Vsym == X);
3568 }
3569 
3570 /****************************
3571  * Eliminate some special cases.
3572  */
3573 
3574 private void elimspec(loop *l)
3575 {
3576     uint i;
3577 
3578     for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3579     {
3580         block *b;
3581 
3582         b = dfo[i];
3583         if (b.Belem)
3584             elimspecwalk(&b.Belem);
3585     }
3586 }
3587 
3588 
3589 /******************************
3590  */
3591 
3592 private void elimspecwalk(elem **pn)
3593 {
3594     elem *n;
3595 
3596     n = *pn;
3597     assert(n);
3598     if (OTunary(n.Eoper))
3599         elimspecwalk(&n.EV.E1);
3600     else if (OTbinary(n.Eoper))
3601     {
3602         elimspecwalk(&n.EV.E1);
3603         elimspecwalk(&n.EV.E2);
3604         if (OTrel(n.Eoper))
3605         {
3606             elem *e1 = n.EV.E1;
3607 
3608             /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3).
3609              * This will reduce the number of cases for elimbasivs().
3610              * Don't do equivalent with (e1 rel (e2,e3)) because
3611              * of potential side effects in e1.
3612              */
3613             if (e1.Eoper == OPcomma)
3614             {
3615                 elem *e;
3616 
3617                 debug if (debugc)
3618                 {   printf("3rewriting ("); WReqn(n); printf(")\n"); }
3619 
3620                 e = n.EV.E2;
3621                 n.EV.E2 = e1;
3622                 n.EV.E1 = n.EV.E2.EV.E1;
3623                 n.EV.E2.EV.E1 = n.EV.E2.EV.E2;
3624                 n.EV.E2.EV.E2 = e;
3625                 n.EV.E2.Eoper = n.Eoper;
3626                 n.EV.E2.Ety = n.Ety;
3627                 n.Eoper = OPcomma;
3628 
3629                 go.changes++;
3630                 doflow = true;
3631 
3632                 elimspecwalk(&n.EV.E1);
3633                 elimspecwalk(&n.EV.E2);
3634             }
3635 
3636             /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3))
3637              * Rewrite ((X ++  e2) rel e3) into ((X +=  e2),(X-e2 rel e3))
3638              * so that the op= will not have a goal, so elimbasivs()
3639              * will work on it.
3640              */
3641             if ((OTopeq(e1.Eoper)
3642                  || OTpost(e1.Eoper)
3643                 ) &&
3644                 !el_sideeffect(e1.EV.E1))
3645             {
3646                 elem *e;
3647                 OPER op;
3648 
3649                 debug if (debugc)
3650                 {   printf("4rewriting ("); WReqn(n); printf(")\n"); }
3651 
3652                 e = el_calloc();
3653                 el_copy(e,n);
3654                 e.EV.E1 = el_copytree(e1.EV.E1);
3655                 e.EV.E1.Ety = n.EV.E1.Ety;
3656                 n.EV.E2 = e;
3657                 switch (e1.Eoper)
3658                 {
3659                     case OPpostinc:
3660                         e1.Eoper = OPaddass;
3661                         op = OPmin;
3662                         goto L3;
3663 
3664                     case OPpostdec:
3665                         e1.Eoper = OPminass;
3666                         op = OPadd;
3667                     L3: e.EV.E1 = el_bin(op,e.EV.E1.Ety,e.EV.E1,el_copytree(e1.EV.E2));
3668                         break;
3669 
3670                     default:
3671                         break;
3672                 }
3673                 /* increment node is now guaranteed to have no goal */
3674                 e1.Nflags |= NFLnogoal;
3675                 n.Eoper = OPcomma;
3676                 //go.changes++;
3677                 doflow = true;
3678 
3679                 elimspecwalk(&n.EV.E1);
3680                 elimspecwalk(&n.EV.E2);
3681             }
3682         }
3683   }
3684 }
3685 
3686 /*********************************
3687  * Unroll loop if possible.
3688  * Params:
3689  *      l = loop to unroll
3690  * Returns:
3691  *      true if loop was unrolled
3692  */
3693 
3694 struct UnrollWalker
3695 {
3696 nothrow:
3697     uint defnum;
3698     int state;
3699     Symbol *v;
3700     targ_llong increment;
3701 
3702     /***********************************
3703      * Walk e in execution order, fixing it according to state.
3704      * state == 0: when rdinc is found, remove it, advance to state 1
3705      * state == 1: continue, replacing instances of v with v+increment,
3706      *             when second rdinc is found, advance to state 2
3707      * state == 2: continue
3708      */
3709 
3710     void walker(elem *e)
3711     {
3712         assert(e);
3713         const op = e.Eoper;
3714         if (ERTOL(e))
3715         {
3716             if (e.Edef != defnum)
3717             {
3718                 walker(e.EV.E2);
3719                 walker(e.EV.E1);
3720             }
3721         }
3722         else if (OTbinary(op))
3723         {
3724             if (e.Edef != defnum)
3725             {
3726                 walker(e.EV.E1);
3727                 walker(e.EV.E2);
3728             }
3729         }
3730         else if (OTunary(op))
3731         {
3732             assert(e.Edef != defnum);
3733             walker(e.EV.E1);
3734         }
3735         else if (op == OPvar &&
3736                  state == 1 &&
3737                  e.EV.Vsym == v)
3738         {
3739             // overwrite e with (v+increment)
3740             elem *e1 = el_calloc();
3741             el_copy(e1,e);
3742             e.Eoper = OPadd;
3743             e.EV.E1 = e1;
3744             e.EV.E2 = el_long(e.Ety, increment);
3745         }
3746         if (OTdef(op) && e.Edef == defnum)
3747         {
3748             switch (state)
3749             {
3750                 case 0:
3751                     el_free(e.EV.E1);
3752                     el_free(e.EV.E2);
3753                     e.Eoper = OPconst;
3754                     e.EV.Vllong = 0;
3755                     break;
3756 
3757                 case 1:
3758                     break;
3759 
3760                 default:
3761                     assert(0);
3762             }
3763             ++state;
3764         }
3765     }
3766 }
3767 
3768 
3769 bool loopunroll(loop *l)
3770 {
3771     const bool log = false;
3772     if (log) printf("loopunroll(%p)\n", l);
3773 
3774     /* Do not repeatedly unroll the same loop,
3775      * or waste time attempting to
3776      */
3777     if (l.Lhead.Bflags & BFLnounroll)
3778         return false;
3779     l.Lhead.Bflags |= BFLnounroll;
3780 
3781     /* For simplification, only unroll loops that consist only
3782      * of a head and tail, and the tail is the exit block.
3783      */
3784     int numblocks = 0;
3785     int i;
3786     for (i = 0; (i = cast(uint) vec_index(i, l.Lloop)) < dfo.length; ++i)  // for each block in loop
3787         ++numblocks;
3788     if (numblocks != 2)
3789     {
3790         if (log) printf("\tnot 2 blocks\n");
3791         return false;
3792     }
3793     assert(l.Lhead != l.Ltail);
3794 
3795     /* tail must be the sole exit block
3796      */
3797     if (vec_testbit(l.Lhead.Bdfoidx, l.Lexit) ||
3798         !vec_testbit(l.Ltail.Bdfoidx, l.Lexit))
3799     {
3800         if (log) printf("\ttail not sole exit block\n");
3801         return false;
3802     }
3803 
3804     elem *ehead = l.Lhead.Belem;
3805     elem *etail = l.Ltail.Belem;
3806 
3807     if (log)
3808     {
3809         printf("Unroll candidate:\n");
3810         printf("  head:\t"); WReqn(l.Lhead.Belem); printf("\n");
3811         printf("  tail:\t"); WReqn(l.Ltail.Belem); printf("\n");
3812     }
3813 
3814     /* Tail must be of the form: (v < c) where v is an unsigned integer
3815      */
3816     if (etail.Eoper != OPlt ||
3817         etail.EV.E1.Eoper != OPvar ||
3818         etail.EV.E2.Eoper != OPconst)
3819     {
3820         if (log) printf("\tnot (v < c)\n");
3821         return false;
3822     }
3823 
3824     elem *e1 = etail.EV.E1;
3825     elem *e2 = etail.EV.E2;
3826 
3827     if (!tyintegral(e1.Ety) ||
3828         tysize(e1.Ety) > targ_llong.sizeof ||
3829         !(tyuns(e1.Ety) || tyuns(e2.Ety)))
3830     {
3831         if (log) printf("\tnot (integral unsigned)\n");
3832         return false;
3833     }
3834 
3835 //    extern int el_length(elem *e);
3836     int cost = el_length(ehead);
3837     //printf("test4 cost: %d\n", cost);
3838 
3839     if (cost > 100)
3840     {
3841         if (log) printf("\tcost %d\n", cost);
3842         return false;
3843     }
3844 
3845     Symbol* v = e1.EV.Vsym;
3846 
3847     // RD info is only reliable for registers and autos
3848     if (!(sytab[v.Sclass] & SCRD) || !(v.Sflags & SFLunambig))
3849     {
3850         if (log) printf("\tnot SCRD\n");
3851         return false;
3852     }
3853 
3854     /* Find the initial, increment elem, and final value of s
3855      */
3856     elem *einitial;
3857     elem *eincrement;
3858     if (!findloopparameters(etail, einitial, eincrement))
3859     {
3860         if (log) printf("\tnot findloopparameters()\n");
3861         return false;
3862     }
3863 
3864     targ_llong initial = el_tolong(einitial.EV.E2);
3865     targ_llong increment = el_tolong(eincrement.EV.E2);
3866     if (eincrement.Eoper == OPpostdec || eincrement.Eoper == OPminass)
3867         increment = -increment;
3868     targ_llong final_ = el_tolong(e2);
3869 
3870     if (log) printf("initial = %lld, increment = %lld, final = %lld\n",cast(long)initial,cast(long)increment,cast(long)final_);
3871 
3872     if (initial < 0 ||
3873         final_ < initial ||
3874         increment <= 0 ||
3875         (final_ - initial) % increment)
3876     {
3877         if (log) printf("\tnot (evenly divisible)\n");
3878         return false;
3879     }
3880 
3881     /* If loop would only execute once anyway, just remove the test at the end
3882      */
3883     if (initial + increment == final_)
3884     {
3885         if (log) printf("\tjust once\n");
3886         etail.Eoper = OPcomma;
3887         e2.EV.Vllong = 0;
3888         e2.Ety = etail.Ety;
3889         return false;
3890     }
3891 
3892     /* Unroll once
3893      */
3894     if ((final_ - initial) % 2)
3895     {
3896         if (log) printf("\tnot (divisible by 2)\n");
3897         return false;
3898     }
3899 
3900     if (log) printf("Unrolling starting\n");
3901 
3902     // Double the increment
3903     eincrement.EV.E2.EV.Vllong *= 2;
3904     //printf("  4head:\t"); WReqn(l.Lhead.Belem); printf("\n");
3905 
3906     elem *esecond = el_copytree(ehead);
3907     elem *e = el_combine(ehead, esecond);
3908 
3909     /* Walk e in execution order.
3910      * When eincrement is found, remove it.
3911      * Continue, replacing instances of `v` with `v+increment`
3912      * When second eincrement is found, stop.
3913      */
3914     UnrollWalker uw;
3915     uw.defnum = eincrement.Edef;
3916     uw.state = 0;
3917     uw.v = v;
3918     uw.increment = increment;
3919     uw.walker(e);
3920     assert(uw.state == 2);
3921 
3922     l.Lhead.Belem = e;
3923 
3924     /* If unrolled loop would only execute once anyway, just remove the test at the end
3925      */
3926     if (initial + 2 * increment == final_)
3927     {
3928         if (log) printf("\tjust twice\n");
3929         etail.Eoper = OPcomma;
3930         e2.EV.Vllong = 0;
3931         e2.Ety = etail.Ety;
3932     }
3933 
3934     //WRfunc();
3935     return true;
3936 }
3937 
3938 /******************************
3939  * Count number of elems in a tree
3940  * Params:
3941  *  e = tree
3942  * Returns:
3943  *  number of elems in tree
3944  */
3945 int el_length(elem *e)
3946 {
3947     int n = 0;
3948     while (e)
3949     {
3950         n += 1;
3951         if (!OTleaf(e.Eoper))
3952         {
3953             n += el_length(e.EV.E2);
3954             e = e.EV.E1;
3955         }
3956         else
3957             break;
3958     }
3959     return n;
3960 }
3961 
3962 
3963 }