Next: , Previous: FR-V Built-in Functions, Up: Target Builtins


5.51.5 X86 Built-in Functions

These built-in functions are available for the i386 and x86-64 family of computers, depending on the command-line switches used.

Note that, if you specify command-line switches such as -msse, the compiler could use the extended instruction sets even if the built-ins are not used explicitly in the program. For this reason, applications which perform runtime CPU detection must compile separate files for each supported architecture, using the appropriate flags. In particular, the file containing the CPU detection code should be compiled without these options.

The following machine modes are available for use with MMX built-in functions (see Vector Extensions): V2SI for a vector of two 32-bit integers, V4HI for a vector of four 16-bit integers, and V8QI for a vector of eight 8-bit integers. Some of the built-in functions operate on MMX registers as a whole 64-bit entity, these use V1DI as their mode.

If 3Dnow extensions are enabled, V2SF is used as a mode for a vector of two 32-bit floating point values.

If SSE extensions are enabled, V4SF is used for a vector of four 32-bit floating point values. Some instructions use a vector of four 32-bit integers, these use V4SI. Finally, some instructions operate on an entire vector register, interpreting it as a 128-bit integer, these use mode TI.

The following built-in functions are made available by -mmmx. All of them generate the machine instruction that is part of the name.

     v8qi __builtin_ia32_paddb (v8qi, v8qi)
     v4hi __builtin_ia32_paddw (v4hi, v4hi)
     v2si __builtin_ia32_paddd (v2si, v2si)
     
     v1di __builtin_ia32_paddq (v1di, v1di)
     
     v8qi __builtin_ia32_psubb (v8qi, v8qi)
     v4hi __builtin_ia32_psubw (v4hi, v4hi)
     v2si __builtin_ia32_psubd (v2si, v2si)
     
     v1di __builtin_ia32_psubq (v1di, v1di)
     
     v8qi __builtin_ia32_paddsb (v8qi, v8qi)
     v4hi __builtin_ia32_paddsw (v4hi, v4hi)
     v8qi __builtin_ia32_psubsb (v8qi, v8qi)
     v4hi __builtin_ia32_psubsw (v4hi, v4hi)
     v8qi __builtin_ia32_paddusb (v8qi, v8qi)
     v4hi __builtin_ia32_paddusw (v4hi, v4hi)
     v8qi __builtin_ia32_psubusb (v8qi, v8qi)
     v4hi __builtin_ia32_psubusw (v4hi, v4hi)
     v4hi __builtin_ia32_pmullw (v4hi, v4hi)
     v4hi __builtin_ia32_pmulhw (v4hi, v4hi)
     v1di __builtin_ia32_pand (v1di, v1di)
     v1di __builtin_ia32_pandn (v1di,v1di)
     v1di __builtin_ia32_por (v1di, v1di)
     v1di __builtin_ia32_pxor (v1di, v1di)
     v8qi __builtin_ia32_pcmpeqb (v8qi, v8qi)
     v4hi __builtin_ia32_pcmpeqw (v4hi, v4hi)
     v2si __builtin_ia32_pcmpeqd (v2si, v2si)
     v8qi __builtin_ia32_pcmpgtb (v8qi, v8qi)
     v4hi __builtin_ia32_pcmpgtw (v4hi, v4hi)
     v2si __builtin_ia32_pcmpgtd (v2si, v2si)
     v8qi __builtin_ia32_punpckhbw (v8qi, v8qi)
     v4hi __builtin_ia32_punpckhwd (v4hi, v4hi)
     v2si __builtin_ia32_punpckhdq (v2si, v2si)
     v8qi __builtin_ia32_punpcklbw (v8qi, v8qi)
     v4hi __builtin_ia32_punpcklwd (v4hi, v4hi)
     v2si __builtin_ia32_punpckldq (v2si, v2si)
     v8qi __builtin_ia32_packsswb (v4hi, v4hi)
     v4hi __builtin_ia32_packssdw (v2si, v2si)
     v8qi __builtin_ia32_packuswb (v4hi, v4hi)
     
     void __builtin_ia32_emms (void)
     v4hi __builtin_ia32_psllw (v4hi, v1di)
     v4hi __builtin_ia32_psllwi (v4hi, int)
     v2si __builtin_ia32_pslld (v2si, v1di)
     v2si __builtin_ia32_pslldi (v2si, int)
     v1di __builtin_ia32_psllq (v1di, v1di)
     
     v1di __builtin_ia32_psllqi (v1di, int)
     v4hi __builtin_ia32_psrlw (v4hi, v1di)
     v4hi __builtin_ia32_psrlwi (v4hi, int)
     v2si __builtin_ia32_psrld (v2si, v1di)
     v2si __builtin_ia32_psrldi (v2si, int)
     v1di __builtin_ia32_psrlq (v1di, v1di)
     
     v1di __builtin_ia32_psrlqi (v1di, int)
     v4hi __builtin_ia32_psraw (v4hi, v1di)
     v4hi __builtin_ia32_psrawi (v4hi, int)
     v2si __builtin_ia32_psrad (v2si, v1di)
     v2si __builtin_ia32_psradi (v2si, int)
     v4hi __builtin_ia32_pshufw (v4hi, int)
     v2si __builtin_ia32_pmaddwd (v4hi, v4hi)
     v2si __builtin_ia32_vec_init_v2si (int, int)
     v4hi __builtin_ia32_vec_init_v4hi (short, short, short, short)
     v8qi __builtin_ia32_vec_init_v8qi (char, char, char, char, char, char, char, char)
     int __builtin_ia32_vec_ext_v2si (v2si, int)
     

The following built-in functions are made available either with -msse, or with a combination of -m3dnow and -march=athlon. All of them generate the machine instruction that is part of the name.

     v4hi __builtin_ia32_pmulhuw (v4hi, v4hi)
     v8qi __builtin_ia32_pavgb (v8qi, v8qi)
     v4hi __builtin_ia32_pavgw (v4hi, v4hi)
     v4hi __builtin_ia32_psadbw (v8qi, v8qi)
     v8qi __builtin_ia32_pmaxub (v8qi, v8qi)
     v4hi __builtin_ia32_pmaxsw (v4hi, v4hi)
     v8qi __builtin_ia32_pminub (v8qi, v8qi)
     v4hi __builtin_ia32_pminsw (v4hi, v4hi)
     int __builtin_ia32_pextrw (v4hi, int)
     v4hi __builtin_ia32_pinsrw (v4hi, int, int)
     int __builtin_ia32_pmovmskb (v8qi)
     void __builtin_ia32_maskmovq (v8qi, v8qi, char *)
     void __builtin_ia32_movntq (v1di *, v1di)
     void __builtin_ia32_sfence (void)
     
     int __builtin_ia32_vec_ext_v4hi (v4hi, int)
     v4hi __builtin_ia32_vec_set_v4hi (v4hi, int, int)
     

The following built-in functions are available when -msse is used. All of them generate the machine instruction that is part of the name.

     int __builtin_ia32_comieq (v4sf, v4sf)
     int __builtin_ia32_comineq (v4sf, v4sf)
     int __builtin_ia32_comilt (v4sf, v4sf)
     int __builtin_ia32_comile (v4sf, v4sf)
     int __builtin_ia32_comigt (v4sf, v4sf)
     int __builtin_ia32_comige (v4sf, v4sf)
     int __builtin_ia32_ucomieq (v4sf, v4sf)
     int __builtin_ia32_ucomineq (v4sf, v4sf)
     int __builtin_ia32_ucomilt (v4sf, v4sf)
     int __builtin_ia32_ucomile (v4sf, v4sf)
     int __builtin_ia32_ucomigt (v4sf, v4sf)
     int __builtin_ia32_ucomige (v4sf, v4sf)
     v4sf __builtin_ia32_addps (v4sf, v4sf)
     v4sf __builtin_ia32_subps (v4sf, v4sf)
     v4sf __builtin_ia32_mulps (v4sf, v4sf)
     v4sf __builtin_ia32_divps (v4sf, v4sf)
     v4sf __builtin_ia32_addss (v4sf, v4sf)
     v4sf __builtin_ia32_subss (v4sf, v4sf)
     v4sf __builtin_ia32_mulss (v4sf, v4sf)
     v4sf __builtin_ia32_divss (v4sf, v4sf)
     v4si __builtin_ia32_cmpeqps (v4sf, v4sf)
     v4si __builtin_ia32_cmpltps (v4sf, v4sf)
     v4si __builtin_ia32_cmpleps (v4sf, v4sf)
     v4si __builtin_ia32_cmpgtps (v4sf, v4sf)
     v4si __builtin_ia32_cmpgeps (v4sf, v4sf)
     v4si __builtin_ia32_cmpunordps (v4sf, v4sf)
     v4si __builtin_ia32_cmpneqps (v4sf, v4sf)
     v4si __builtin_ia32_cmpnltps (v4sf, v4sf)
     v4si __builtin_ia32_cmpnleps (v4sf, v4sf)
     v4si __builtin_ia32_cmpngtps (v4sf, v4sf)
     v4si __builtin_ia32_cmpngeps (v4sf, v4sf)
     v4si __builtin_ia32_cmpordps (v4sf, v4sf)
     v4si __builtin_ia32_cmpeqss (v4sf, v4sf)
     v4si __builtin_ia32_cmpltss (v4sf, v4sf)
     v4si __builtin_ia32_cmpless (v4sf, v4sf)
     v4si __builtin_ia32_cmpunordss (v4sf, v4sf)
     v4si __builtin_ia32_cmpneqss (v4sf, v4sf)
     v4si __builtin_ia32_cmpnlts (v4sf, v4sf)
     v4si __builtin_ia32_cmpnless (v4sf, v4sf)
     v4si __builtin_ia32_cmpordss (v4sf, v4sf)
     v4sf __builtin_ia32_maxps (v4sf, v4sf)
     v4sf __builtin_ia32_maxss (v4sf, v4sf)
     v4sf __builtin_ia32_minps (v4sf, v4sf)
     v4sf __builtin_ia32_minss (v4sf, v4sf)
     v4sf __builtin_ia32_andps (v4sf, v4sf)
     v4sf __builtin_ia32_andnps (v4sf, v4sf)
     v4sf __builtin_ia32_orps (v4sf, v4sf)
     v4sf __builtin_ia32_xorps (v4sf, v4sf)
     v4sf __builtin_ia32_movss (v4sf, v4sf)
     v4sf __builtin_ia32_movhlps (v4sf, v4sf)
     v4sf __builtin_ia32_movlhps (v4sf, v4sf)
     v4sf __builtin_ia32_unpckhps (v4sf, v4sf)
     v4sf __builtin_ia32_unpcklps (v4sf, v4sf)
     v4sf __builtin_ia32_cvtpi2ps (v4sf, v2si)
     v4sf __builtin_ia32_cvtsi2ss (v4sf, int)
     v2si __builtin_ia32_cvtps2pi (v4sf)
     int __builtin_ia32_cvtss2si (v4sf)
     v2si __builtin_ia32_cvttps2pi (v4sf)
     int __builtin_ia32_cvttss2si (v4sf)
     v4sf __builtin_ia32_rcpps (v4sf)
     v4sf __builtin_ia32_rsqrtps (v4sf)
     v4sf __builtin_ia32_sqrtps (v4sf)
     v4sf __builtin_ia32_rcpss (v4sf)
     v4sf __builtin_ia32_rsqrtss (v4sf)
     v4sf __builtin_ia32_sqrtss (v4sf)
     v4sf __builtin_ia32_shufps (v4sf, v4sf, int)
     void __builtin_ia32_movntps (float *, v4sf)
     int __builtin_ia32_movmskps (v4sf)
     
     void __builtin_ia32_ldmxcsr (unsigned)
     unsigned __builtin_ia32_stmxcsr (void)
     v2df __builtin_ia32_vec_ext_v2df (v2df, int)
     v2di __builtin_ia32_vec_ext_v2di (v2di, int)
     v4sf __builtin_ia32_vec_ext_v4sf (v4sf, int)
     v4si __builtin_ia32_vec_ext_v4si (v4si, int)
     v8hi __builtin_ia32_vec_set_v8hi (v8hi, int, int)
     unsigned int __builtin_ia32_vec_ext_v8hi (v8hi, int)
     

The following built-in functions are available when -msse is used.

v4sf __builtin_ia32_loadaps (float *)
Generates the movaps machine instruction as a load from memory.
void __builtin_ia32_storeaps (float *, v4sf)
Generates the movaps machine instruction as a store to memory.
v4sf __builtin_ia32_loadups (float *)
Generates the movups machine instruction as a load from memory.
void __builtin_ia32_storeups (float *, v4sf)
Generates the movups machine instruction as a store to memory.
v4sf __builtin_ia32_loadsss (float *)
Generates the movss machine instruction as a load from memory.
void __builtin_ia32_storess (float *, v4sf)
Generates the movss machine instruction as a store to memory.
v4sf __builtin_ia32_loadhps (v4sf, v2si *)
Generates the movhps machine instruction as a load from memory.
v4sf __builtin_ia32_loadlps (v4sf, v2si *)
Generates the movlps machine instruction as a load from memory
void __builtin_ia32_storehps (v4sf, v2si *)
Generates the movhps machine instruction as a store to memory.
void __builtin_ia32_storelps (v4sf, v2si *)
Generates the movlps machine instruction as a store to memory.

The following built-in functions are available when -msse2 is used. All of them generate the machine instruction that is part of the name.

     int __builtin_ia32_comisdeq (v2df, v2df)
     int __builtin_ia32_comisdlt (v2df, v2df)
     int __builtin_ia32_comisdle (v2df, v2df)
     int __builtin_ia32_comisdgt (v2df, v2df)
     int __builtin_ia32_comisdge (v2df, v2df)
     int __builtin_ia32_comisdneq (v2df, v2df)
     int __builtin_ia32_ucomisdeq (v2df, v2df)
     int __builtin_ia32_ucomisdlt (v2df, v2df)
     int __builtin_ia32_ucomisdle (v2df, v2df)
     int __builtin_ia32_ucomisdgt (v2df, v2df)
     int __builtin_ia32_ucomisdge (v2df, v2df)
     int __builtin_ia32_ucomisdneq (v2df, v2df)
     v2df __builtin_ia32_cmpeqpd (v2df, v2df)
     v2df __builtin_ia32_cmpltpd (v2df, v2df)
     v2df __builtin_ia32_cmplepd (v2df, v2df)
     v2df __builtin_ia32_cmpgtpd (v2df, v2df)
     v2df __builtin_ia32_cmpgepd (v2df, v2df)
     v2df __builtin_ia32_cmpunordpd (v2df, v2df)
     v2df __builtin_ia32_cmpneqpd (v2df, v2df)
     v2df __builtin_ia32_cmpnltpd (v2df, v2df)
     v2df __builtin_ia32_cmpnlepd (v2df, v2df)
     v2df __builtin_ia32_cmpngtpd (v2df, v2df)
     v2df __builtin_ia32_cmpngepd (v2df, v2df)
     v2df __builtin_ia32_cmpordpd (v2df, v2df)
     v2df __builtin_ia32_cmpeqsd (v2df, v2df)
     v2df __builtin_ia32_cmpltsd (v2df, v2df)
     v2df __builtin_ia32_cmplesd (v2df, v2df)
     v2df __builtin_ia32_cmpunordsd (v2df, v2df)
     v2df __builtin_ia32_cmpneqsd (v2df, v2df)
     v2df __builtin_ia32_cmpnltsd (v2df, v2df)
     v2df __builtin_ia32_cmpnlesd (v2df, v2df)
     v2df __builtin_ia32_cmpordsd (v2df, v2df)
     v2di __builtin_ia32_paddq (v2di, v2di)
     v2di __builtin_ia32_psubq (v2di, v2di)
     v2df __builtin_ia32_addpd (v2df, v2df)
     v2df __builtin_ia32_subpd (v2df, v2df)
     v2df __builtin_ia32_mulpd (v2df, v2df)
     v2df __builtin_ia32_divpd (v2df, v2df)
     v2df __builtin_ia32_addsd (v2df, v2df)
     v2df __builtin_ia32_subsd (v2df, v2df)
     v2df __builtin_ia32_mulsd (v2df, v2df)
     v2df __builtin_ia32_divsd (v2df, v2df)
     v2df __builtin_ia32_minpd (v2df, v2df)
     v2df __builtin_ia32_maxpd (v2df, v2df)
     v2df __builtin_ia32_minsd (v2df, v2df)
     v2df __builtin_ia32_maxsd (v2df, v2df)
     v2df __builtin_ia32_andpd (v2df, v2df)
     v2df __builtin_ia32_andnpd (v2df, v2df)
     v2df __builtin_ia32_orpd (v2df, v2df)
     v2df __builtin_ia32_xorpd (v2df, v2df)
     v2df __builtin_ia32_movsd (v2df, v2df)
     v2df __builtin_ia32_unpckhpd (v2df, v2df)
     v2df __builtin_ia32_unpcklpd (v2df, v2df)
     v16qi __builtin_ia32_paddb128 (v16qi, v16qi)
     v8hi __builtin_ia32_paddw128 (v8hi, v8hi)
     v4si __builtin_ia32_paddd128 (v4si, v4si)
     v2di __builtin_ia32_paddq128 (v2di, v2di)
     v16qi __builtin_ia32_psubb128 (v16qi, v16qi)
     v8hi __builtin_ia32_psubw128 (v8hi, v8hi)
     v4si __builtin_ia32_psubd128 (v4si, v4si)
     v2di __builtin_ia32_psubq128 (v2di, v2di)
     v8hi __builtin_ia32_pmullw128 (v8hi, v8hi)
     v8hi __builtin_ia32_pmulhw128 (v8hi, v8hi)
     v2di __builtin_ia32_pand128 (v2di, v2di)
     v2di __builtin_ia32_pandn128 (v2di, v2di)
     v2di __builtin_ia32_por128 (v2di, v2di)
     v2di __builtin_ia32_pxor128 (v2di, v2di)
     v16qi __builtin_ia32_pavgb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pavgw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pcmpeqb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pcmpeqw128 (v8hi, v8hi)
     v4si __builtin_ia32_pcmpeqd128 (v4si, v4si)
     v16qi __builtin_ia32_pcmpgtb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pcmpgtw128 (v8hi, v8hi)
     v4si __builtin_ia32_pcmpgtd128 (v4si, v4si)
     v16qi __builtin_ia32_pmaxub128 (v16qi, v16qi)
     v8hi __builtin_ia32_pmaxsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pminub128 (v16qi, v16qi)
     v8hi __builtin_ia32_pminsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_punpckhbw128 (v16qi, v16qi)
     v8hi __builtin_ia32_punpckhwd128 (v8hi, v8hi)
     v4si __builtin_ia32_punpckhdq128 (v4si, v4si)
     v2di __builtin_ia32_punpckhqdq128 (v2di, v2di)
     v16qi __builtin_ia32_punpcklbw128 (v16qi, v16qi)
     v8hi __builtin_ia32_punpcklwd128 (v8hi, v8hi)
     v4si __builtin_ia32_punpckldq128 (v4si, v4si)
     v2di __builtin_ia32_punpcklqdq128 (v2di, v2di)
     v16qi __builtin_ia32_packsswb128 (v16qi, v16qi)
     v8hi __builtin_ia32_packssdw128 (v8hi, v8hi)
     v16qi __builtin_ia32_packuswb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pmulhuw128 (v8hi, v8hi)
     void __builtin_ia32_maskmovdqu (v16qi, v16qi)
     v2df __builtin_ia32_loadupd (double *)
     void __builtin_ia32_storeupd (double *, v2df)
     v2df __builtin_ia32_loadhpd (v2df, double *)
     v2df __builtin_ia32_loadlpd (v2df, double *)
     int __builtin_ia32_movmskpd (v2df)
     int __builtin_ia32_pmovmskb128 (v16qi)
     void __builtin_ia32_movnti (int *, int)
     void __builtin_ia32_movntpd (double *, v2df)
     void __builtin_ia32_movntdq (v2df *, v2df)
     v4si __builtin_ia32_pshufd (v4si, int)
     v8hi __builtin_ia32_pshuflw (v8hi, int)
     v8hi __builtin_ia32_pshufhw (v8hi, int)
     v2di __builtin_ia32_psadbw128 (v16qi, v16qi)
     v2df __builtin_ia32_sqrtpd (v2df)
     v2df __builtin_ia32_sqrtsd (v2df)
     v2df __builtin_ia32_shufpd (v2df, v2df, int)
     v2df __builtin_ia32_cvtdq2pd (v4si)
     v4sf __builtin_ia32_cvtdq2ps (v4si)
     v4si __builtin_ia32_cvtpd2dq (v2df)
     v2si __builtin_ia32_cvtpd2pi (v2df)
     v4sf __builtin_ia32_cvtpd2ps (v2df)
     v4si __builtin_ia32_cvttpd2dq (v2df)
     v2si __builtin_ia32_cvttpd2pi (v2df)
     v2df __builtin_ia32_cvtpi2pd (v2si)
     int __builtin_ia32_cvtsd2si (v2df)
     int __builtin_ia32_cvttsd2si (v2df)
     long long __builtin_ia32_cvtsd2si64 (v2df)
     long long __builtin_ia32_cvttsd2si64 (v2df)
     v4si __builtin_ia32_cvtps2dq (v4sf)
     v2df __builtin_ia32_cvtps2pd (v4sf)
     v4si __builtin_ia32_cvttps2dq (v4sf)
     v2df __builtin_ia32_cvtsi2sd (v2df, int)
     v2df __builtin_ia32_cvtsi642sd (v2df, long long)
     v4sf __builtin_ia32_cvtsd2ss (v4sf, v2df)
     v2df __builtin_ia32_cvtss2sd (v2df, v4sf)
     void __builtin_ia32_clflush (const void *)
     void __builtin_ia32_lfence (void)
     void __builtin_ia32_mfence (void)
     v16qi __builtin_ia32_loaddqu (const char *)
     void __builtin_ia32_storedqu (char *, v16qi)
     unsigned long long __builtin_ia32_pmuludq (v2si, v2si)
     v2di __builtin_ia32_pmuludq128 (v4si, v4si)
     v8hi __builtin_ia32_psllw128 (v8hi, v2di)
     v4si __builtin_ia32_pslld128 (v4si, v2di)
     v2di __builtin_ia32_psllq128 (v4si, v2di)
     v8hi __builtin_ia32_psrlw128 (v8hi, v2di)
     v4si __builtin_ia32_psrld128 (v4si, v2di)
     v2di __builtin_ia32_psrlq128 (v2di, v2di)
     v8hi __builtin_ia32_psraw128 (v8hi, v2di)
     v4si __builtin_ia32_psrad128 (v4si, v2di)
     v2di __builtin_ia32_pslldqi128 (v2di, int)
     v8hi __builtin_ia32_psllwi128 (v8hi, int)
     v4si __builtin_ia32_pslldi128 (v4si, int)
     v2di __builtin_ia32_psllqi128 (v2di, int)
     v2di __builtin_ia32_psrldqi128 (v2di, int)
     v8hi __builtin_ia32_psrlwi128 (v8hi, int)
     v4si __builtin_ia32_psrldi128 (v4si, int)
     v2di __builtin_ia32_psrlqi128 (v2di, int)
     v8hi __builtin_ia32_psrawi128 (v8hi, int)
     v4si __builtin_ia32_psradi128 (v4si, int)
     v4si __builtin_ia32_pmaddwd128 (v8hi, v8hi)

The following built-in functions are available when -msse3 is used. All of them generate the machine instruction that is part of the name.

     v2df __builtin_ia32_addsubpd (v2df, v2df)
     v4sf __builtin_ia32_addsubps (v4sf, v4sf)
     v2df __builtin_ia32_haddpd (v2df, v2df)
     v4sf __builtin_ia32_haddps (v4sf, v4sf)
     v2df __builtin_ia32_hsubpd (v2df, v2df)
     v4sf __builtin_ia32_hsubps (v4sf, v4sf)
     v16qi __builtin_ia32_lddqu (char const *)
     void __builtin_ia32_monitor (void *, unsigned int, unsigned int)
     v2df __builtin_ia32_movddup (v2df)
     v4sf __builtin_ia32_movshdup (v4sf)
     v4sf __builtin_ia32_movsldup (v4sf)
     void __builtin_ia32_mwait (unsigned int, unsigned int)

The following built-in functions are available when -msse3 is used.

v2df __builtin_ia32_loadddup (double const *)
Generates the movddup machine instruction as a load from memory.

The following built-in functions are available when -mssse3 is used. All of them generate the machine instruction that is part of the name with MMX registers.

     v2si __builtin_ia32_phaddd (v2si, v2si)
     v4hi __builtin_ia32_phaddw (v4hi, v4hi)
     v4hi __builtin_ia32_phaddsw (v4hi, v4hi)
     v2si __builtin_ia32_phsubd (v2si, v2si)
     v4hi __builtin_ia32_phsubw (v4hi, v4hi)
     v4hi __builtin_ia32_phsubsw (v4hi, v4hi)
     v8qi __builtin_ia32_pmaddubsw (v8qi, v8qi)
     v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi)
     v8qi __builtin_ia32_pshufb (v8qi, v8qi)
     v8qi __builtin_ia32_psignb (v8qi, v8qi)
     v2si __builtin_ia32_psignd (v2si, v2si)
     v4hi __builtin_ia32_psignw (v4hi, v4hi)
     v1di __builtin_ia32_palignr (v1di, v1di, int)
     v8qi __builtin_ia32_pabsb (v8qi)
     v2si __builtin_ia32_pabsd (v2si)
     v4hi __builtin_ia32_pabsw (v4hi)

The following built-in functions are available when -mssse3 is used. All of them generate the machine instruction that is part of the name with SSE registers.

     v4si __builtin_ia32_phaddd128 (v4si, v4si)
     v8hi __builtin_ia32_phaddw128 (v8hi, v8hi)
     v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi)
     v4si __builtin_ia32_phsubd128 (v4si, v4si)
     v8hi __builtin_ia32_phsubw128 (v8hi, v8hi)
     v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pmaddubsw128 (v16qi, v16qi)
     v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pshufb128 (v16qi, v16qi)
     v16qi __builtin_ia32_psignb128 (v16qi, v16qi)
     v4si __builtin_ia32_psignd128 (v4si, v4si)
     v8hi __builtin_ia32_psignw128 (v8hi, v8hi)
     v2di __builtin_ia32_palignr (v2di, v2di, int)
     v16qi __builtin_ia32_pabsb128 (v16qi)
     v4si __builtin_ia32_pabsd128 (v4si)
     v8hi __builtin_ia32_pabsw128 (v8hi)

The following built-in functions are available when -m3dnow is used. All of them generate the machine instruction that is part of the name.

     void __builtin_ia32_femms (void)
     v8qi __builtin_ia32_pavgusb (v8qi, v8qi)
     v2si __builtin_ia32_pf2id (v2sf)
     v2sf __builtin_ia32_pfacc (v2sf, v2sf)
     v2sf __builtin_ia32_pfadd (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpeq (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpge (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpgt (v2sf, v2sf)
     v2sf __builtin_ia32_pfmax (v2sf, v2sf)
     v2sf __builtin_ia32_pfmin (v2sf, v2sf)
     v2sf __builtin_ia32_pfmul (v2sf, v2sf)
     v2sf __builtin_ia32_pfrcp (v2sf)
     v2sf __builtin_ia32_pfrcpit1 (v2sf, v2sf)
     v2sf __builtin_ia32_pfrcpit2 (v2sf, v2sf)
     v2sf __builtin_ia32_pfrsqrt (v2sf)
     v2sf __builtin_ia32_pfrsqrtit1 (v2sf, v2sf)
     v2sf __builtin_ia32_pfsub (v2sf, v2sf)
     v2sf __builtin_ia32_pfsubr (v2sf, v2sf)
     v2sf __builtin_ia32_pi2fd (v2si)
     v4hi __builtin_ia32_pmulhrw (v4hi, v4hi)

The following built-in functions are available when both -m3dnow and -march=athlon are used. All of them generate the machine instruction that is part of the name.

     v2si __builtin_ia32_pf2iw (v2sf)
     v2sf __builtin_ia32_pfnacc (v2sf, v2sf)
     v2sf __builtin_ia32_pfpnacc (v2sf, v2sf)
     v2sf __builtin_ia32_pi2fw (v2si)
     v2sf __builtin_ia32_pswapdsf (v2sf)
     v2si __builtin_ia32_pswapdsi (v2si)