# vim:ts=4 # # Copyright (c) 2007 Hypertriton, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE # USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub Test { my ($ver) = @_; MkDefine('SSE_CFLAGS', '-msse'); MkCompileAndRunC('HAVE_SSE', '${CFLAGS} ${SSE_CFLAGS}', '', << 'EOF'); #include #include #define MAXERR 1e-4 typedef union vec { float v[4]; __m128 m128; struct { float x, y, z, pad; }; } Vector __attribute__ ((aligned(16))); const float testVals[4][7] = { { 0.076003,0.559770,0.163680, 1.0, 0.076003,0.559770,0.163680 }, { 0.076003,0.559770,0.163680, 0.20485, 0.015569,0.114667,0.033529 }, { 0.668390,0.929890,0.382710, 1.0, 0.668390,0.929890,0.382710 }, { 0.668390,0.929890,0.382710, 0.95831, 0.640530,0.891120,0.366760 }, }; static Vector Scale(Vector a, float c) { Vector b; __m128 v; v = _mm_set1_ps(c); b.m128 = _mm_mul_ps(a.m128, v); return (b); } int main(int argc, char *argv[]) { Vector a, b; float dx, dy, dz; int i, j; for (i = 0; i < 10000; i++) { for (j = 0; j < 4; j++) { a.x = testVals[j][0]; a.y = testVals[j][1]; a.z = testVals[j][2]; b = Scale(a, testVals[j][3]); dx = b.x - testVals[j][4]; dy = b.y - testVals[j][5]; dz = b.z - testVals[j][6]; if ((dx > 0.0 && dx > MAXERR) || (dx < 0.0 && dx < -MAXERR) || (dy > 0.0 && dy > MAXERR) || (dy < 0.0 && dz < -MAXERR) || (dz > 0.0 && dz > MAXERR) || (dz < 0.0 && dz < -MAXERR)) { printf("results inaccurate [%f,%f,%f]\n", dx, dy, dz); return (1); } } } return (0); } EOF MkIf('"${HAVE_SSE}" = "yes"'); MkSaveDefine('SSE_CFLAGS'); MkElse; MkSaveUndef('SSE_CFLAGS'); MkDefine('SSE_CFLAGS', ''); MkEndif; MkSaveMK('SSE_CFLAGS'); MkPrintN('checking for SSE2 extensions...'); MkDefine('SSE2_CFLAGS', '-msse2'); MkCompileAndRunC('HAVE_SSE2', '${CFLAGS} ${SSE2_CFLAGS}', '', << 'EOF'); #include int main(int argc, char *argv[]) { double a[4] __attribute__ ((aligned(16))); double b[4] __attribute__ ((aligned(16))); double rv; __m128d vec1, vec2; vec1 = _mm_load_pd(a); vec2 = _mm_load_pd(b); vec1 = _mm_xor_pd(vec1, vec2); _mm_store_sd(&rv, vec1); return (0); } EOF MkIf('"${HAVE_SSE2}" = "yes"'); MkSaveDefine('SSE2_CFLAGS'); MkElse; MkSaveUndef('SSE2_CFLAGS'); MkDefine('SSE2_CFLAGS', ''); MkEndif; MkSaveMK('SSE2_CFLAGS'); MkPrintN('checking for SSE3 extensions...'); MkDefine('SSE3_CFLAGS', '-msse3'); MkCompileAndRunC('HAVE_SSE3', '${CFLAGS} ${SSE3_CFLAGS}', '', << 'EOF'); #include int main(int argc, char *argv[]) { float a[4] __attribute__ ((aligned(16))); float b[4] __attribute__ ((aligned(16))); __m128 vec1, vec2; float rv; vec1 = _mm_load_ps(a); vec2 = _mm_load_ps(b); vec1 = _mm_mul_ps(vec1, vec2); vec1 = _mm_hadd_ps(vec1, vec1); vec1 = _mm_hadd_ps(vec1, vec1); _mm_store_ss(&rv, vec1); return (0); } EOF MkIf('"${HAVE_SSE3}" = "yes"'); MkSaveDefine('SSE3_CFLAGS'); MkElse; MkSaveUndef('SSE3_CFLAGS'); MkDefine('SSE3_CFLAGS', ''); MkEndif; MkSaveMK('SSE3_CFLAGS'); return (0); } BEGIN { $DESCR{'sse'} = 'SSE extensions'; $TESTS{'sse'} = \&Test; $DEPS{'sse'} = 'cc'; } ;1