Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/i386/driver-i386.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 58ad6c70ea60 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Subroutines for the gcc driver. | |
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. | |
3 | |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify | |
7 it under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation; either version 3, or (at your option) | |
9 any later version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, | |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 GNU General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #include "config.h" | |
21 #include "system.h" | |
22 #include "coretypes.h" | |
23 #include "tm.h" | |
24 #include <stdlib.h> | |
25 | |
26 const char *host_detect_local_cpu (int argc, const char **argv); | |
27 | |
28 #ifdef __GNUC__ | |
29 #include "cpuid.h" | |
30 | |
31 struct cache_desc | |
32 { | |
33 unsigned sizekb; | |
34 unsigned assoc; | |
35 unsigned line; | |
36 }; | |
37 | |
38 /* Returns command line parameters that describe size and | |
39 cache line size of the processor caches. */ | |
40 | |
41 static char * | |
42 describe_cache (struct cache_desc level1, struct cache_desc level2) | |
43 { | |
44 char size[100], line[100], size2[100]; | |
45 | |
46 /* At the moment, gcc does not use the information | |
47 about the associativity of the cache. */ | |
48 | |
49 sprintf (size, "--param l1-cache-size=%u", level1.sizekb); | |
50 sprintf (line, "--param l1-cache-line-size=%u", level1.line); | |
51 | |
52 sprintf (size2, "--param l2-cache-size=%u", level2.sizekb); | |
53 | |
54 return concat (size, " ", line, " ", size2, " ", NULL); | |
55 } | |
56 | |
57 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */ | |
58 | |
59 static void | |
60 detect_l2_cache (struct cache_desc *level2) | |
61 { | |
62 unsigned eax, ebx, ecx, edx; | |
63 unsigned assoc; | |
64 | |
65 __cpuid (0x80000006, eax, ebx, ecx, edx); | |
66 | |
67 level2->sizekb = (ecx >> 16) & 0xffff; | |
68 level2->line = ecx & 0xff; | |
69 | |
70 assoc = (ecx >> 12) & 0xf; | |
71 if (assoc == 6) | |
72 assoc = 8; | |
73 else if (assoc == 8) | |
74 assoc = 16; | |
75 else if (assoc >= 0xa && assoc <= 0xc) | |
76 assoc = 32 + (assoc - 0xa) * 16; | |
77 else if (assoc >= 0xd && assoc <= 0xe) | |
78 assoc = 96 + (assoc - 0xd) * 32; | |
79 | |
80 level2->assoc = assoc; | |
81 } | |
82 | |
83 /* Returns the description of caches for an AMD processor. */ | |
84 | |
85 static const char * | |
86 detect_caches_amd (unsigned max_ext_level) | |
87 { | |
88 unsigned eax, ebx, ecx, edx; | |
89 | |
90 struct cache_desc level1, level2 = {0, 0, 0}; | |
91 | |
92 if (max_ext_level < 0x80000005) | |
93 return ""; | |
94 | |
95 __cpuid (0x80000005, eax, ebx, ecx, edx); | |
96 | |
97 level1.sizekb = (ecx >> 24) & 0xff; | |
98 level1.assoc = (ecx >> 16) & 0xff; | |
99 level1.line = ecx & 0xff; | |
100 | |
101 if (max_ext_level >= 0x80000006) | |
102 detect_l2_cache (&level2); | |
103 | |
104 return describe_cache (level1, level2); | |
105 } | |
106 | |
107 /* Decodes the size, the associativity and the cache line size of | |
108 L1/L2 caches of an Intel processor. Values are based on | |
109 "Intel Processor Identification and the CPUID Instruction" | |
110 [Application Note 485], revision -032, December 2007. */ | |
111 | |
112 static void | |
113 decode_caches_intel (unsigned reg, bool xeon_mp, | |
114 struct cache_desc *level1, struct cache_desc *level2) | |
115 { | |
116 int i; | |
117 | |
118 for (i = 24; i >= 0; i -= 8) | |
119 switch ((reg >> i) & 0xff) | |
120 { | |
121 case 0x0a: | |
122 level1->sizekb = 8; level1->assoc = 2; level1->line = 32; | |
123 break; | |
124 case 0x0c: | |
125 level1->sizekb = 16; level1->assoc = 4; level1->line = 32; | |
126 break; | |
127 case 0x2c: | |
128 level1->sizekb = 32; level1->assoc = 8; level1->line = 64; | |
129 break; | |
130 case 0x39: | |
131 level2->sizekb = 128; level2->assoc = 4; level2->line = 64; | |
132 break; | |
133 case 0x3a: | |
134 level2->sizekb = 192; level2->assoc = 6; level2->line = 64; | |
135 break; | |
136 case 0x3b: | |
137 level2->sizekb = 128; level2->assoc = 2; level2->line = 64; | |
138 break; | |
139 case 0x3c: | |
140 level2->sizekb = 256; level2->assoc = 4; level2->line = 64; | |
141 break; | |
142 case 0x3d: | |
143 level2->sizekb = 384; level2->assoc = 6; level2->line = 64; | |
144 break; | |
145 case 0x3e: | |
146 level2->sizekb = 512; level2->assoc = 4; level2->line = 64; | |
147 break; | |
148 case 0x41: | |
149 level2->sizekb = 128; level2->assoc = 4; level2->line = 32; | |
150 break; | |
151 case 0x42: | |
152 level2->sizekb = 256; level2->assoc = 4; level2->line = 32; | |
153 break; | |
154 case 0x43: | |
155 level2->sizekb = 512; level2->assoc = 4; level2->line = 32; | |
156 break; | |
157 case 0x44: | |
158 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; | |
159 break; | |
160 case 0x45: | |
161 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; | |
162 break; | |
163 case 0x49: | |
164 if (xeon_mp) | |
165 break; | |
166 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; | |
167 break; | |
168 case 0x4e: | |
169 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; | |
170 break; | |
171 case 0x60: | |
172 level1->sizekb = 16; level1->assoc = 8; level1->line = 64; | |
173 break; | |
174 case 0x66: | |
175 level1->sizekb = 8; level1->assoc = 4; level1->line = 64; | |
176 break; | |
177 case 0x67: | |
178 level1->sizekb = 16; level1->assoc = 4; level1->line = 64; | |
179 break; | |
180 case 0x68: | |
181 level1->sizekb = 32; level1->assoc = 4; level1->line = 64; | |
182 break; | |
183 case 0x78: | |
184 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; | |
185 break; | |
186 case 0x79: | |
187 level2->sizekb = 128; level2->assoc = 8; level2->line = 64; | |
188 break; | |
189 case 0x7a: | |
190 level2->sizekb = 256; level2->assoc = 8; level2->line = 64; | |
191 break; | |
192 case 0x7b: | |
193 level2->sizekb = 512; level2->assoc = 8; level2->line = 64; | |
194 break; | |
195 case 0x7c: | |
196 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; | |
197 break; | |
198 case 0x7d: | |
199 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; | |
200 break; | |
201 case 0x7f: | |
202 level2->sizekb = 512; level2->assoc = 2; level2->line = 64; | |
203 break; | |
204 case 0x82: | |
205 level2->sizekb = 256; level2->assoc = 8; level2->line = 32; | |
206 break; | |
207 case 0x83: | |
208 level2->sizekb = 512; level2->assoc = 8; level2->line = 32; | |
209 break; | |
210 case 0x84: | |
211 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; | |
212 break; | |
213 case 0x85: | |
214 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; | |
215 break; | |
216 case 0x86: | |
217 level2->sizekb = 512; level2->assoc = 4; level2->line = 64; | |
218 break; | |
219 case 0x87: | |
220 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; | |
221 | |
222 default: | |
223 break; | |
224 } | |
225 } | |
226 | |
227 /* Detect cache parameters using CPUID function 2. */ | |
228 | |
229 static void | |
230 detect_caches_cpuid2 (bool xeon_mp, | |
231 struct cache_desc *level1, struct cache_desc *level2) | |
232 { | |
233 unsigned regs[4]; | |
234 int nreps, i; | |
235 | |
236 __cpuid (2, regs[0], regs[1], regs[2], regs[3]); | |
237 | |
238 nreps = regs[0] & 0x0f; | |
239 regs[0] &= ~0x0f; | |
240 | |
241 while (--nreps >= 0) | |
242 { | |
243 for (i = 0; i < 4; i++) | |
244 if (regs[i] && !((regs[i] >> 31) & 1)) | |
245 decode_caches_intel (regs[i], xeon_mp, level1, level2); | |
246 | |
247 if (nreps) | |
248 __cpuid (2, regs[0], regs[1], regs[2], regs[3]); | |
249 } | |
250 } | |
251 | |
252 /* Detect cache parameters using CPUID function 4. This | |
253 method doesn't require hardcoded tables. */ | |
254 | |
255 enum cache_type | |
256 { | |
257 CACHE_END = 0, | |
258 CACHE_DATA = 1, | |
259 CACHE_INST = 2, | |
260 CACHE_UNIFIED = 3 | |
261 }; | |
262 | |
263 static void | |
264 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2) | |
265 { | |
266 struct cache_desc *cache; | |
267 | |
268 unsigned eax, ebx, ecx, edx; | |
269 int count; | |
270 | |
271 for (count = 0;; count++) | |
272 { | |
273 __cpuid_count(4, count, eax, ebx, ecx, edx); | |
274 switch (eax & 0x1f) | |
275 { | |
276 case CACHE_END: | |
277 return; | |
278 case CACHE_DATA: | |
279 case CACHE_UNIFIED: | |
280 { | |
281 switch ((eax >> 5) & 0x07) | |
282 { | |
283 case 1: | |
284 cache = level1; | |
285 break; | |
286 case 2: | |
287 cache = level2; | |
288 break; | |
289 default: | |
290 cache = NULL; | |
291 } | |
292 | |
293 if (cache) | |
294 { | |
295 unsigned sets = ecx + 1; | |
296 unsigned part = ((ebx >> 12) & 0x03ff) + 1; | |
297 | |
298 cache->assoc = ((ebx >> 22) & 0x03ff) + 1; | |
299 cache->line = (ebx & 0x0fff) + 1; | |
300 | |
301 cache->sizekb = (cache->assoc * part | |
302 * cache->line * sets) / 1024; | |
303 } | |
304 } | |
305 default: | |
306 break; | |
307 } | |
308 } | |
309 } | |
310 | |
311 /* Returns the description of caches for an Intel processor. */ | |
312 | |
313 static const char * | |
314 detect_caches_intel (bool xeon_mp, unsigned max_level, unsigned max_ext_level) | |
315 { | |
316 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}; | |
317 | |
318 if (max_level >= 4) | |
319 detect_caches_cpuid4 (&level1, &level2); | |
320 else if (max_level >= 2) | |
321 detect_caches_cpuid2 (xeon_mp, &level1, &level2); | |
322 else | |
323 return ""; | |
324 | |
325 if (level1.sizekb == 0) | |
326 return ""; | |
327 | |
328 /* Intel CPUs are equipped with AMD style L2 cache info. Try this | |
329 method if other methods fail to provide L2 cache parameters. */ | |
330 if (level2.sizekb == 0 && max_ext_level >= 0x80000006) | |
331 detect_l2_cache (&level2); | |
332 | |
333 return describe_cache (level1, level2); | |
334 } | |
335 | |
336 enum vendor_signatures | |
337 { | |
338 SIG_INTEL = 0x756e6547 /* Genu */, | |
339 SIG_AMD = 0x68747541 /* Auth */, | |
340 SIG_GEODE = 0x646f6547 /* Geod */ | |
341 }; | |
342 | |
343 /* This will be called by the spec parser in gcc.c when it sees | |
344 a %:local_cpu_detect(args) construct. Currently it will be called | |
345 with either "arch" or "tune" as argument depending on if -march=native | |
346 or -mtune=native is to be substituted. | |
347 | |
348 It returns a string containing new command line parameters to be | |
349 put at the place of the above two options, depending on what CPU | |
350 this is executed. E.g. "-march=k8" on an AMD64 machine | |
351 for -march=native. | |
352 | |
353 ARGC and ARGV are set depending on the actual arguments given | |
354 in the spec. */ | |
355 | |
356 const char *host_detect_local_cpu (int argc, const char **argv) | |
357 { | |
358 enum processor_type processor = PROCESSOR_I386; | |
359 const char *cpu = "i386"; | |
360 | |
361 const char *cache = ""; | |
362 const char *options = ""; | |
363 | |
364 unsigned int eax, ebx, ecx, edx; | |
365 | |
366 unsigned int max_level, ext_level; | |
367 | |
368 unsigned int vendor; | |
369 unsigned int model, family; | |
370 | |
371 unsigned int has_sse3, has_ssse3, has_cmpxchg16b; | |
372 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; | |
373 | |
374 /* Extended features */ | |
375 unsigned int has_lahf_lm = 0, has_sse4a = 0; | |
376 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; | |
377 | |
378 bool arch; | |
379 | |
380 if (argc < 1) | |
381 return NULL; | |
382 | |
383 arch = !strcmp (argv[0], "arch"); | |
384 | |
385 if (!arch && strcmp (argv[0], "tune")) | |
386 return NULL; | |
387 | |
388 max_level = __get_cpuid_max (0, &vendor); | |
389 if (max_level < 1) | |
390 goto done; | |
391 | |
392 __cpuid (1, eax, ebx, ecx, edx); | |
393 | |
394 /* We don't care for extended family. */ | |
395 model = (eax >> 4) & 0x0f; | |
396 family = (eax >> 8) & 0x0f; | |
397 | |
398 has_sse3 = ecx & bit_SSE3; | |
399 has_ssse3 = ecx & bit_SSSE3; | |
400 has_cmpxchg16b = ecx & bit_CMPXCHG16B; | |
401 | |
402 has_cmpxchg8b = edx & bit_CMPXCHG8B; | |
403 has_cmov = edx & bit_CMOV; | |
404 has_mmx = edx & bit_MMX; | |
405 has_sse = edx & bit_SSE; | |
406 has_sse2 = edx & bit_SSE2; | |
407 | |
408 /* Check cpuid level of extended features. */ | |
409 __cpuid (0x80000000, ext_level, ebx, ecx, edx); | |
410 | |
411 if (ext_level > 0x80000000) | |
412 { | |
413 __cpuid (0x80000001, eax, ebx, ecx, edx); | |
414 | |
415 has_lahf_lm = ecx & bit_LAHF_LM; | |
416 has_sse4a = ecx & bit_SSE4a; | |
417 | |
418 has_longmode = edx & bit_LM; | |
419 has_3dnowp = edx & bit_3DNOWP; | |
420 has_3dnow = edx & bit_3DNOW; | |
421 } | |
422 | |
423 if (!arch) | |
424 { | |
425 if (vendor == SIG_AMD) | |
426 cache = detect_caches_amd (ext_level); | |
427 else if (vendor == SIG_INTEL) | |
428 { | |
429 bool xeon_mp = (family == 15 && model == 6); | |
430 cache = detect_caches_intel (xeon_mp, max_level, ext_level); | |
431 } | |
432 } | |
433 | |
434 if (vendor == SIG_AMD) | |
435 { | |
436 processor = PROCESSOR_PENTIUM; | |
437 | |
438 if (has_mmx) | |
439 processor = PROCESSOR_K6; | |
440 if (has_3dnowp) | |
441 processor = PROCESSOR_ATHLON; | |
442 if (has_sse2 || has_longmode) | |
443 processor = PROCESSOR_K8; | |
444 if (has_sse4a) | |
445 processor = PROCESSOR_AMDFAM10; | |
446 } | |
447 else if (vendor == SIG_GEODE) | |
448 processor = PROCESSOR_GEODE; | |
449 else | |
450 { | |
451 switch (family) | |
452 { | |
453 case 4: | |
454 processor = PROCESSOR_I486; | |
455 break; | |
456 case 5: | |
457 processor = PROCESSOR_PENTIUM; | |
458 break; | |
459 case 6: | |
460 processor = PROCESSOR_PENTIUMPRO; | |
461 break; | |
462 case 15: | |
463 processor = PROCESSOR_PENTIUM4; | |
464 break; | |
465 default: | |
466 /* We have no idea. */ | |
467 processor = PROCESSOR_GENERIC32; | |
468 } | |
469 } | |
470 | |
471 switch (processor) | |
472 { | |
473 case PROCESSOR_I386: | |
474 /* Default. */ | |
475 break; | |
476 case PROCESSOR_I486: | |
477 cpu = "i486"; | |
478 break; | |
479 case PROCESSOR_PENTIUM: | |
480 if (arch && has_mmx) | |
481 cpu = "pentium-mmx"; | |
482 else | |
483 cpu = "pentium"; | |
484 break; | |
485 case PROCESSOR_PENTIUMPRO: | |
486 if (has_longmode) | |
487 /* It is Core 2 Duo. */ | |
488 cpu = "core2"; | |
489 else if (arch) | |
490 { | |
491 if (has_sse3) | |
492 /* It is Core Duo. */ | |
493 cpu = "prescott"; | |
494 else if (has_sse2) | |
495 /* It is Pentium M. */ | |
496 cpu = "pentium-m"; | |
497 else if (has_sse) | |
498 /* It is Pentium III. */ | |
499 cpu = "pentium3"; | |
500 else if (has_mmx) | |
501 /* It is Pentium II. */ | |
502 cpu = "pentium2"; | |
503 else | |
504 /* Default to Pentium Pro. */ | |
505 cpu = "pentiumpro"; | |
506 } | |
507 else | |
508 /* For -mtune, we default to -mtune=generic. */ | |
509 cpu = "generic"; | |
510 break; | |
511 case PROCESSOR_PENTIUM4: | |
512 if (has_sse3) | |
513 { | |
514 if (has_longmode) | |
515 cpu = "nocona"; | |
516 else | |
517 cpu = "prescott"; | |
518 } | |
519 else | |
520 cpu = "pentium4"; | |
521 break; | |
522 case PROCESSOR_GEODE: | |
523 cpu = "geode"; | |
524 break; | |
525 case PROCESSOR_K6: | |
526 if (arch && has_3dnow) | |
527 cpu = "k6-3"; | |
528 else | |
529 cpu = "k6"; | |
530 break; | |
531 case PROCESSOR_ATHLON: | |
532 if (arch && has_sse) | |
533 cpu = "athlon-4"; | |
534 else | |
535 cpu = "athlon"; | |
536 break; | |
537 case PROCESSOR_K8: | |
538 if (arch && has_sse3) | |
539 cpu = "k8-sse3"; | |
540 else | |
541 cpu = "k8"; | |
542 break; | |
543 case PROCESSOR_AMDFAM10: | |
544 cpu = "amdfam10"; | |
545 break; | |
546 | |
547 default: | |
548 /* Use something reasonable. */ | |
549 if (arch) | |
550 { | |
551 if (has_ssse3) | |
552 cpu = "core2"; | |
553 else if (has_sse3) | |
554 { | |
555 if (has_longmode) | |
556 cpu = "nocona"; | |
557 else | |
558 cpu = "prescott"; | |
559 } | |
560 else if (has_sse2) | |
561 cpu = "pentium4"; | |
562 else if (has_cmov) | |
563 cpu = "pentiumpro"; | |
564 else if (has_mmx) | |
565 cpu = "pentium-mmx"; | |
566 else if (has_cmpxchg8b) | |
567 cpu = "pentium"; | |
568 } | |
569 else | |
570 cpu = "generic"; | |
571 } | |
572 | |
573 if (arch) | |
574 { | |
575 if (has_cmpxchg16b) | |
576 options = concat (options, "-mcx16 ", NULL); | |
577 if (has_lahf_lm) | |
578 options = concat (options, "-msahf ", NULL); | |
579 } | |
580 | |
581 done: | |
582 return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL); | |
583 } | |
584 #else | |
585 | |
586 /* If we aren't compiling with GCC we just provide a minimal | |
587 default value. */ | |
588 | |
589 const char *host_detect_local_cpu (int argc, const char **argv) | |
590 { | |
591 const char *cpu; | |
592 bool arch; | |
593 | |
594 if (argc < 1) | |
595 return NULL; | |
596 | |
597 arch = !strcmp (argv[0], "arch"); | |
598 | |
599 if (!arch && strcmp (argv[0], "tune")) | |
600 return NULL; | |
601 | |
602 if (arch) | |
603 { | |
604 /* FIXME: i386 is wrong for 64bit compiler. How can we tell if | |
605 we are generating 64bit or 32bit code? */ | |
606 cpu = "i386"; | |
607 } | |
608 else | |
609 cpu = "generic"; | |
610 | |
611 return concat ("-m", argv[0], "=", cpu, NULL); | |
612 } | |
613 #endif /* __GNUC__ */ |