1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Manage the memory allocated on the runtime stack to save Common Subexpressions (CSE). 6 * 7 * Copyright: Copyright (C) 1985-1998 by Symantec 8 * Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved 9 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 10 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 11 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcse.d, backend/cgcse.d) 12 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcse.d 13 */ 14 15 module dmd.backend.cgcse; 16 17 version (SCPP) 18 version = COMPILE; 19 version (MARS) 20 version = COMPILE; 21 22 version (COMPILE) 23 { 24 25 import core.stdc.stdio; 26 import core.stdc.stdlib; 27 import core.stdc.string; 28 29 import dmd.backend.cc; 30 import dmd.backend.cdef; 31 import dmd.backend.code; 32 import dmd.backend.code_x86; 33 import dmd.backend.el; 34 import dmd.backend.global; 35 import dmd.backend.ty; 36 37 import dmd.backend.barray; 38 39 extern (C++): 40 nothrow: 41 42 43 /**************************** 44 * Table of common subexpressions stored on the stack. 45 * csextab[] array of info on saved CSEs 46 * CSEpe pointer to saved elem 47 * CSEregm mask of register that was saved (so for multi- 48 * register variables we know which part we have) 49 */ 50 51 enum CSEload = 1; // set if the CSE was ever loaded 52 enum CSEsimple = 2; // CSE can be regenerated easily 53 54 struct CSE 55 { 56 elem* e; // pointer to elem 57 code csimple; // if CSEsimple, this is the code to regenerate it 58 regm_t regm; // mask of register stored there 59 int slot; // slot number 60 ubyte flags; // flag bytes 61 62 nothrow: 63 64 /************************ 65 * Initialize at function entry. 66 */ 67 static void initialize() 68 { 69 csextab.setLength(64); // reserve some space 70 } 71 72 /************************ 73 * Start for generating code for this function. 74 * After ending generation, call finish(). 75 */ 76 static void start() 77 { 78 csextab.setLength(0); // no entries in table yet 79 slotSize = REGSIZE; 80 alignment_ = REGSIZE; 81 } 82 83 /******************************* 84 * Create and add a new CSE entry. 85 * Returns: 86 * pointer to created entry 87 */ 88 static CSE* add() 89 { 90 foreach (ref cse; csextab) 91 { 92 if (cse.e == null) // can share with previously used one 93 { 94 cse.flags &= CSEload; 95 return &cse; 96 } 97 } 98 99 // create new one 100 const slot = cast(int)csextab.length; 101 CSE cse; 102 cse.slot = slot; 103 csextab.push(cse); 104 return &csextab[slot]; 105 } 106 107 /******************************** 108 * Update slot size and alignment to worst case. 109 * 110 * A bit wasteful of stack space. 111 * Params: e = elem with a size and an alignment 112 */ 113 static void updateSizeAndAlign(elem* e) 114 { 115 if (I16) 116 return; 117 const sz = tysize(e.Ety); 118 if (slotSize < sz) 119 slotSize = sz; 120 const alignsize = el_alignsize(e); 121 122 static if (0) 123 printf("set slot size = %d, sz = %d, al = %d, ty = x%x, %s\n", 124 slotSize, cast(int)sz, cast(int)alignsize, 125 cast(int)tybasic(e.Ety), funcsym_p.Sident.ptr); 126 127 if (alignsize >= 16 && TARGET_STACKALIGN >= 16) 128 { 129 alignment_ = alignsize; 130 STACKALIGN = alignsize; 131 enforcealign = true; 132 } 133 } 134 135 /**** 136 * Get range of all CSEs filtered by matching `e`, 137 * starting with most recent. 138 * Params: e = elem to match 139 * Returns: 140 * input range 141 */ 142 static auto filter(const elem* e) 143 { 144 struct Range 145 { 146 const elem* e; 147 int i; 148 149 nothrow: 150 151 bool empty() 152 { 153 while (i) 154 { 155 if (csextab[i - 1].e == e) 156 return false; 157 --i; 158 } 159 return true; 160 } 161 162 ref CSE front() { return csextab[i - 1]; } 163 164 void popFront() { --i; } 165 } 166 167 return Range(e, cast(int)csextab.length); 168 } 169 170 /********************* 171 * Remove instances of `e` from CSE table. 172 * Params: e = elem to remove 173 */ 174 static void remove(const elem* e) 175 { 176 foreach (ref cse; csextab[]) 177 { 178 if (cse.e == e) 179 cse.e = null; 180 } 181 } 182 183 /************************ 184 * Create mask of registers from CSEs that refer to `e`. 185 * Params: e = elem to match 186 * Returns: 187 * mask 188 */ 189 static regm_t mask(const elem* e) 190 { 191 regm_t result = 0; 192 foreach (ref cse; csextab[]) 193 { 194 if (cse.e) 195 elem_debug(cse.e); 196 if (cse.e == e) 197 result |= cse.regm; 198 } 199 return result; 200 } 201 202 /*** 203 * Finish generating code for this function. 204 * 205 * Get rid of unused cse temporaries by shrinking the array. 206 * References: loaded() 207 */ 208 static void finish() 209 { 210 while (csextab.length != 0 && (csextab[csextab.length - 1].flags & CSEload) == 0) 211 csextab.setLength(csextab.length - 1); 212 } 213 214 /**** The rest of the functions can be called only after finish() ****/ 215 216 /****************** 217 * Returns: 218 * total size used by CSE's 219 */ 220 static uint size() 221 { 222 return cast(uint)csextab.length * CSE.slotSize; 223 } 224 225 /********************* 226 * Returns: 227 * alignment needed for CSE region of the stack 228 */ 229 static uint alignment() 230 { 231 return alignment_; 232 } 233 234 /// Returns: offset of slot i from start of CSE region 235 static uint offset(int i) 236 { 237 return i * slotSize; 238 } 239 240 /// Returns: true if CSE was ever loaded 241 static bool loaded(int i) 242 { 243 return i < csextab.length && // array could be shrunk for non-CSEload entries 244 (csextab[i].flags & CSEload); 245 } 246 247 private: 248 __gshared: 249 Barray!CSE csextab; // CSE table (allocated for each function) 250 uint slotSize; // size of each slot in table 251 uint alignment_; // alignment for the table 252 } 253 254 255 /******************** 256 * The above implementation of CSE is inefficient: 257 * 1. the optimization to not store CSE's that are never reloaded is based on the slot number, 258 * not the CSE. This means that when a slot is shared among multiple CSEs, it is treated 259 * as "reloaded" even if only one of the CSEs in that slot is reloaded. 260 * 2. updateSizeAndAlign should only be run when reloading when (1) is fixed. 261 * 3. all slots are aligned to worst case alignment of any slot. 262 * 4. unused slots still get memory allocated to them if they aren't at the end of the 263 * slot table. 264 * 265 * The slot number should be unique to each CSE, and allocation of actual slots should be 266 * done after the code is generated, not during generation. 267 */ 268 269 270 }