O0のときにvolatileの方が早い理由がわからん。
#include <cstdio> #include <ctime> #include <sys/time.h> #include <typeinfo> template <typename T, int N> void func() { T a = 0; T b; int i; struct timeval t_st, t_ed; gettimeofday(&t_st, NULL); for (i = 0; i < N; i++) { b = a; } gettimeofday(&t_ed, NULL); int t = ((t_ed.tv_sec - t_st.tv_sec) * 1000000 + (t_ed.tv_usec - t_st.tv_usec)); printf("%s\n", typeid(T).name()); printf("%d usec\n", t); printf("%.5lf usec/once\n", (double)t/N); } int main() { const int n = 10000000; func<int, n>(); func<volatile int, n>(); func<long, n>(); func<volatile long, n>(); func<float, n>(); func<volatile float, n>(); func<double, n>(); func<volatile double, n>(); }
> g++ -O0 dataread_exp.cc > ./a.out i 81954 usec 0.00820 usec/once i 64415 usec 0.00644 usec/once l 72789 usec 0.00728 usec/once l 56349 usec 0.00563 usec/once f 72931 usec 0.00729 usec/once f 40693 usec 0.00407 usec/once d 71768 usec 0.00718 usec/once d 60869 usec 0.00609 usec/once > g++ -O1 dataread_exp.cc > ./a.out i 2 usec 0.00000 usec/once i 30583 usec 0.00306 usec/once l 1 usec 0.00000 usec/once l 30142 usec 0.00301 usec/once f 2 usec 0.00000 usec/once f 30324 usec 0.00303 usec/once d 1 usec 0.00000 usec/once d 30476 usec 0.00305 usec/once
O0 時のobjdump -dの結果。慣れないなりに読んでみた限りでは違いはないよーに見えるんだが……
080485be <_Z4funcIiLi10000000EEvv>: 80485be: 55 push %ebp 80485bf: 89 e5 mov %esp,%ebp 80485c1: 53 push %ebx 80485c2: 83 ec 34 sub $0x34,%esp 80485c5: c7 45 ec 00 00 00 00 movl $0x0,-0x14(%ebp) 80485cc: c7 44 24 04 00 00 00 movl $0x0,0x4(%esp) 80485d3: 00 80485d4: 8d 45 e4 lea -0x1c(%ebp),%eax 80485d7: 89 04 24 mov %eax,(%esp) 80485da: e8 c1 fe ff ff call 80484a0 <gettimeofday@plt> 80485df: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) 80485e6: eb 0a jmp 80485f2 <_Z4funcIiLi10000000EEvv+ 0x34> 80485e8: 8b 45 ec mov -0x14(%ebp),%eax 80485eb: 89 45 f0 mov %eax,-0x10(%ebp) 80485ee: 83 45 f4 01 addl $0x1,-0xc(%ebp) 80485f2: 81 7d f4 7f 96 98 00 cmpl $0x98967f,-0xc(%ebp) 80485f9: 7e ed jle 80485e8 <_Z4funcIiLi10000000EEvv+ 0x2a> 80485fb: c7 44 24 04 00 00 00 movl $0x0,0x4(%esp) 8048602: 00 8048603: 8d 45 dc lea -0x24(%ebp),%eax 8048606: 89 04 24 mov %eax,(%esp) 8048609: e8 92 fe ff ff call 80484a0 <gettimeofday@plt> ... 0804867c <_Z4funcIViLi10000000EEvv>: 804867c: 55 push %ebp 804867d: 89 e5 mov %esp,%ebp 804867f: 53 push %ebx 8048680: 83 ec 34 sub $0x34,%esp 8048683: c7 45 f0 00 00 00 00 movl $0x0,-0x10(%ebp) 804868a: c7 44 24 04 00 00 00 movl $0x0,0x4(%esp) 8048691: 00 8048692: 8d 45 e4 lea -0x1c(%ebp),%eax 8048695: 89 04 24 mov %eax,(%esp) 8048698: e8 03 fe ff ff call 80484a0 <gettimeofday@plt> 804869d: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) 80486a4: eb 0a jmp 80486b0 <_Z4funcIViLi10000000EEvv+0x34> 80486a6: 8b 45 f0 mov -0x10(%ebp),%eax 80486a9: 89 45 ec mov %eax,-0x14(%ebp) 80486ac: 83 45 f4 01 addl $0x1,-0xc(%ebp) 80486b0: 81 7d f4 7f 96 98 00 cmpl $0x98967f,-0xc(%ebp) 80486b7: 7e ed jle 80486a6 <_Z4funcIViLi10000000EEvv+0x2a> 80486b9: c7 44 24 04 00 00 00 movl $0x0,0x4(%esp) 80486c0: 00 80486c1: 8d 45 dc lea -0x24(%ebp),%eax 80486c4: 89 04 24 mov %eax,(%esp) 80486c7: e8 d4 fd ff ff call 80484a0 <gettimeofday@plt> ...