Next: , Previous: FR-V Built-in Functions, Up: Target Builtins


5.48.4 X86 Built-in Functions

These built-in functions are available for the i386 and x86-64 family of computers, depending on the command-line switches used.

The following machine modes are available for use with MMX built-in functions (see Vector Extensions): V2SI for a vector of two 32-bit integers, V4HI for a vector of four 16-bit integers, and V8QI for a vector of eight 8-bit integers. Some of the built-in functions operate on MMX registers as a whole 64-bit entity, these use V1DI as their mode.

If 3Dnow extensions are enabled, V2SF is used as a mode for a vector of two 32-bit floating point values.

If SSE extensions are enabled, V4SF is used for a vector of four 32-bit floating point values. Some instructions use a vector of four 32-bit integers, these use V4SI. Finally, some instructions operate on an entire vector register, interpreting it as a 128-bit integer, these use mode TI.

The following built-in functions are made available by -mmmx. All of them generate the machine instruction that is part of the name.

     v8qi __builtin_ia32_paddb (v8qi, v8qi)
     v4hi __builtin_ia32_paddw (v4hi, v4hi)
     v2si __builtin_ia32_paddd (v2si, v2si)
     
     v1di __builtin_ia32_paddq (v1di, v1di)
     
     v8qi __builtin_ia32_psubb (v8qi, v8qi)
     v4hi __builtin_ia32_psubw (v4hi, v4hi)
     v2si __builtin_ia32_psubd (v2si, v2si)
     
     v1di __builtin_ia32_psubq (v1di, v1di)
     
     v8qi __builtin_ia32_paddsb (v8qi, v8qi)
     v4hi __builtin_ia32_paddsw (v4hi, v4hi)
     v8qi __builtin_ia32_psubsb (v8qi, v8qi)
     v4hi __builtin_ia32_psubsw (v4hi, v4hi)
     v8qi __builtin_ia32_paddusb (v8qi, v8qi)
     v4hi __builtin_ia32_paddusw (v4hi, v4hi)
     v8qi __builtin_ia32_psubusb (v8qi, v8qi)
     v4hi __builtin_ia32_psubusw (v4hi, v4hi)
     v4hi __builtin_ia32_pmullw (v4hi, v4hi)
     v4hi __builtin_ia32_pmulhw (v4hi, v4hi)
     v1di __builtin_ia32_pand (v1di, v1di)
     v1di __builtin_ia32_pandn (v1di,v1di)
     v1di __builtin_ia32_por (v1di, v1di)
     v1di __builtin_ia32_pxor (v1di, v1di)
     v8qi __builtin_ia32_pcmpeqb (v8qi, v8qi)
     v4hi __builtin_ia32_pcmpeqw (v4hi, v4hi)
     v2si __builtin_ia32_pcmpeqd (v2si, v2si)
     v8qi __builtin_ia32_pcmpgtb (v8qi, v8qi)
     v4hi __builtin_ia32_pcmpgtw (v4hi, v4hi)
     v2si __builtin_ia32_pcmpgtd (v2si, v2si)
     v8qi __builtin_ia32_punpckhbw (v8qi, v8qi)
     v4hi __builtin_ia32_punpckhwd (v4hi, v4hi)
     v2si __builtin_ia32_punpckhdq (v2si, v2si)
     v8qi __builtin_ia32_punpcklbw (v8qi, v8qi)
     v4hi __builtin_ia32_punpcklwd (v4hi, v4hi)
     v2si __builtin_ia32_punpckldq (v2si, v2si)
     v8qi __builtin_ia32_packsswb (v4hi, v4hi)
     v4hi __builtin_ia32_packssdw (v2si, v2si)
     v8qi __builtin_ia32_packuswb (v4hi, v4hi)
     
     void __builtin_ia32_emms (void)
     v4hi __builtin_ia32_psllw (v4hi, v1di)
     v4hi __builtin_ia32_psllwi (v4hi, int)
     v2si __builtin_ia32_pslld (v2si, v1di)
     v2si __builtin_ia32_pslldi (v2si, int)
     v1di __builtin_ia32_psllq (v1di, v1di)
     
     v1di __builtin_ia32_psllqi (v1di, int)
     v4hi __builtin_ia32_psrlw (v4hi, v1di)
     v4hi __builtin_ia32_psrlwi (v4hi, int)
     v2si __builtin_ia32_psrld (v2si, v1di)
     v2si __builtin_ia32_psrldi (v2si, int)
     v1di __builtin_ia32_psrlq (v1di, v1di)
     
     v1di __builtin_ia32_psrlqi (v1di, int)
     v4hi __builtin_ia32_psraw (v4hi, v1di)
     v4hi __builtin_ia32_psrawi (v4hi, int)
     v2si __builtin_ia32_psrad (v2si, v1di)
     v2si __builtin_ia32_psradi (v2si, int)
     v4hi __builtin_ia32_pshufw (v4hi, int)
     v2si __builtin_ia32_pmaddwd (v4hi, v4hi)
     v2si __builtin_ia32_vec_init_v2si (int, int)
     v4hi __builtin_ia32_vec_init_v4hi (short, short, short, short)
     v8qi __builtin_ia32_vec_init_v8qi (char, char, char, char, char, char, char, char)
     int __builtin_ia32_vec_ext_v2si (v2si, int)
     

The following built-in functions are made available either with -msse, or with a combination of -m3dnow and -march=athlon. All of them generate the machine instruction that is part of the name.

     v4hi __builtin_ia32_pmulhuw (v4hi, v4hi)
     v8qi __builtin_ia32_pavgb (v8qi, v8qi)
     v4hi __builtin_ia32_pavgw (v4hi, v4hi)
     v4hi __builtin_ia32_psadbw (v8qi, v8qi)
     v8qi __builtin_ia32_pmaxub (v8qi, v8qi)
     v4hi __builtin_ia32_pmaxsw (v4hi, v4hi)
     v8qi __builtin_ia32_pminub (v8qi, v8qi)
     v4hi __builtin_ia32_pminsw (v4hi, v4hi)
     int __builtin_ia32_pextrw (v4hi, int)
     v4hi __builtin_ia32_pinsrw (v4hi, int, int)
     int __builtin_ia32_pmovmskb (v8qi)
     void __builtin_ia32_maskmovq (v8qi, v8qi, char *)
     void __builtin_ia32_movntq (v1di *, v1di)
     void __builtin_ia32_sfence (void)
     
     int __builtin_ia32_vec_ext_v4hi (v4hi, int)
     v4hi __builtin_ia32_vec_set_v4hi (v4hi, int, int)
     

The following built-in functions are available when -msse is used. All of them generate the machine instruction that is part of the name.

     int __builtin_ia32_comieq (v4sf, v4sf)
     int __builtin_ia32_comineq (v4sf, v4sf)
     int __builtin_ia32_comilt (v4sf, v4sf)
     int __builtin_ia32_comile (v4sf, v4sf)
     int __builtin_ia32_comigt (v4sf, v4sf)
     int __builtin_ia32_comige (v4sf, v4sf)
     int __builtin_ia32_ucomieq (v4sf, v4sf)
     int __builtin_ia32_ucomineq (v4sf, v4sf)
     int __builtin_ia32_ucomilt (v4sf, v4sf)
     int __builtin_ia32_ucomile (v4sf, v4sf)
     int __builtin_ia32_ucomigt (v4sf, v4sf)
     int __builtin_ia32_ucomige (v4sf, v4sf)
     v4sf __builtin_ia32_addps (v4sf, v4sf)
     v4sf __builtin_ia32_subps (v4sf, v4sf)
     v4sf __builtin_ia32_mulps (v4sf, v4sf)
     v4sf __builtin_ia32_divps (v4sf, v4sf)
     v4sf __builtin_ia32_addss (v4sf, v4sf)
     v4sf __builtin_ia32_subss (v4sf, v4sf)
     v4sf __builtin_ia32_mulss (v4sf, v4sf)
     v4sf __builtin_ia32_divss (v4sf, v4sf)
     v4si __builtin_ia32_cmpeqps (v4sf, v4sf)
     v4si __builtin_ia32_cmpltps (v4sf, v4sf)
     v4si __builtin_ia32_cmpleps (v4sf, v4sf)
     v4si __builtin_ia32_cmpgtps (v4sf, v4sf)
     v4si __builtin_ia32_cmpgeps (v4sf, v4sf)
     v4si __builtin_ia32_cmpunordps (v4sf, v4sf)
     v4si __builtin_ia32_cmpneqps (v4sf, v4sf)
     v4si __builtin_ia32_cmpnltps (v4sf, v4sf)
     v4si __builtin_ia32_cmpnleps (v4sf, v4sf)
     v4si __builtin_ia32_cmpngtps (v4sf, v4sf)
     v4si __builtin_ia32_cmpngeps (v4sf, v4sf)
     v4si __builtin_ia32_cmpordps (v4sf, v4sf)
     v4si __builtin_ia32_cmpeqss (v4sf, v4sf)
     v4si __builtin_ia32_cmpltss (v4sf, v4sf)
     v4si __builtin_ia32_cmpless (v4sf, v4sf)
     v4si __builtin_ia32_cmpunordss (v4sf, v4sf)
     v4si __builtin_ia32_cmpneqss (v4sf, v4sf)
     v4si __builtin_ia32_cmpnlts (v4sf, v4sf)
     v4si __builtin_ia32_cmpnless (v4sf, v4sf)
     v4si __builtin_ia32_cmpordss (v4sf, v4sf)
     v4sf __builtin_ia32_maxps (v4sf, v4sf)
     v4sf __builtin_ia32_maxss (v4sf, v4sf)
     v4sf __builtin_ia32_minps (v4sf, v4sf)
     v4sf __builtin_ia32_minss (v4sf, v4sf)
     v4sf __builtin_ia32_andps (v4sf, v4sf)
     v4sf __builtin_ia32_andnps (v4sf, v4sf)
     v4sf __builtin_ia32_orps (v4sf, v4sf)
     v4sf __builtin_ia32_xorps (v4sf, v4sf)
     v4sf __builtin_ia32_movss (v4sf, v4sf)
     v4sf __builtin_ia32_movhlps (v4sf, v4sf)
     v4sf __builtin_ia32_movlhps (v4sf, v4sf)
     v4sf __builtin_ia32_unpckhps (v4sf, v4sf)
     v4sf __builtin_ia32_unpcklps (v4sf, v4sf)
     v4sf __builtin_ia32_cvtpi2ps (v4sf, v2si)
     v4sf __builtin_ia32_cvtsi2ss (v4sf, int)
     v2si __builtin_ia32_cvtps2pi (v4sf)
     int __builtin_ia32_cvtss2si (v4sf)
     v2si __builtin_ia32_cvttps2pi (v4sf)
     int __builtin_ia32_cvttss2si (v4sf)
     v4sf __builtin_ia32_rcpps (v4sf)
     v4sf __builtin_ia32_rsqrtps (v4sf)
     v4sf __builtin_ia32_sqrtps (v4sf)
     v4sf __builtin_ia32_rcpss (v4sf)
     v4sf __builtin_ia32_rsqrtss (v4sf)
     v4sf __builtin_ia32_sqrtss (v4sf)
     v4sf __builtin_ia32_shufps (v4sf, v4sf, int)
     void __builtin_ia32_movntps (float *, v4sf)
     int __builtin_ia32_movmskps (v4sf)
     
     void __builtin_ia32_ldmxcsr (unsigned)
     unsigned __builtin_ia32_stmxcsr (void)
     v2df __builtin_ia32_vec_ext_v2df (v2df, int)
     v2di __builtin_ia32_vec_ext_v2di (v2di, int)
     v4sf __builtin_ia32_vec_ext_v4sf (v4sf, int)
     v4si __builtin_ia32_vec_ext_v4si (v4si, int)
     v8hi __builtin_ia32_vec_set_v8hi (v8hi, int, int)
     unsigned int __builtin_ia32_vec_ext_v8hi (v8hi, int)
     

The following built-in functions are available when -msse is used.

v4sf __builtin_ia32_loadaps (float *)
Generates the movaps machine instruction as a load from memory.
void __builtin_ia32_storeaps (float *, v4sf)
Generates the movaps machine instruction as a store to memory.
v4sf __builtin_ia32_loadups (float *)
Generates the movups machine instruction as a load from memory.
void __builtin_ia32_storeups (float *, v4sf)
Generates the movups machine instruction as a store to memory.
v4sf __builtin_ia32_loadsss (float *)
Generates the movss machine instruction as a load from memory.
void __builtin_ia32_storess (float *, v4sf)
Generates the movss machine instruction as a store to memory.
v4sf __builtin_ia32_loadhps (v4sf, v2si *)
Generates the movhps machine instruction as a load from memory.
v4sf __builtin_ia32_loadlps (v4sf, v2si *)
Generates the movlps machine instruction as a load from memory
void __builtin_ia32_storehps (v4sf, v2si *)
Generates the movhps machine instruction as a store to memory.
void __builtin_ia32_storelps (v4sf, v2si *)
Generates the movlps machine instruction as a store to memory.

The following built-in functions are available when -msse2 is used. All of them generate the machine instruction that is part of the name.

     v2df __builtin_ia32_addpd (v2df, v2df)
     v2df __builtin_ia32_subpd (v2df, v2df)
     v2df __builtin_ia32_mulpd (v2df, v2df)
     v2df __builtin_ia32_divpd (v2df, v2df)
     v2df __builtin_ia32_addsd (v2df, v2df)
     v2df __builtin_ia32_subsd (v2df, v2df)
     v2df __builtin_ia32_mulsd (v2df, v2df)
     v2df __builtin_ia32_divsd (v2df, v2df)
     v2df __builtin_ia32_cmpeqpd (v2df, v2df)
     v2df __builtin_ia32_cmpltpd (v2df, v2df)
     v2df __builtin_ia32_cmplepd (v2df, v2df)
     v2df __builtin_ia32_cmpgtpd (v2df, v2df)
     v2df __builtin_ia32_cmpgepd (v2df, v2df)
     v2df __builtin_ia32_cmpunordpd (v2df, v2df)
     v2df __builtin_ia32_cmpneqpd (v2df, v2df)
     v2df __builtin_ia32_cmpnltpd (v2df, v2df)
     v2df __builtin_ia32_cmpnlepd (v2df, v2df)
     v2df __builtin_ia32_cmpngtpd (v2df, v2df)
     v2df __builtin_ia32_cmpngepd (v2df, v2df)
     v2df __builtin_ia32_cmpordpd (v2df, v2df)
     v2df __builtin_ia32_cmpeqsd (v2df, v2df)
     v2df __builtin_ia32_cmpltsd (v2df, v2df)
     v2df __builtin_ia32_cmplesd (v2df, v2df)
     v2df __builtin_ia32_cmpunordsd (v2df, v2df)
     v2df __builtin_ia32_cmpneqsd (v2df, v2df)
     v2df __builtin_ia32_cmpnltsd (v2df, v2df)
     v2df __builtin_ia32_cmpnlesd (v2df, v2df)
     v2df __builtin_ia32_cmpordsd (v2df, v2df)
     v2df __builtin_ia32_minpd (v2df, v2df)
     v2df __builtin_ia32_maxpd (v2df, v2df)
     v2df __builtin_ia32_minsd (v2df, v2df)
     v2df __builtin_ia32_maxsd (v2df, v2df)
     v2df __builtin_ia32_andpd (v2df, v2df)
     v2df __builtin_ia32_andnpd (v2df, v2df)
     v2df __builtin_ia32_orpd (v2df, v2df)
     v2df __builtin_ia32_xorpd (v2df, v2df)
     v2df __builtin_ia32_movsd (v2df, v2df)
     v2df __builtin_ia32_unpckhpd (v2df, v2df)
     v2df __builtin_ia32_unpcklpd (v2df, v2df)
     v16qi __builtin_ia32_paddb128 (v16qi, v16qi)
     v8hi __builtin_ia32_paddw128 (v8hi, v8hi)
     v4si __builtin_ia32_paddd128 (v4si, v4si)
     v2di __builtin_ia32_paddq128 (v2di, v2di)
     v16qi __builtin_ia32_psubb128 (v16qi, v16qi)
     v8hi __builtin_ia32_psubw128 (v8hi, v8hi)
     v4si __builtin_ia32_psubd128 (v4si, v4si)
     v2di __builtin_ia32_psubq128 (v2di, v2di)
     v16qi __builtin_ia32_paddsb128 (v16qi, v16qi)
     v8hi __builtin_ia32_paddsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_psubsb128 (v16qi, v16qi)
     v8hi __builtin_ia32_psubsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_paddusb128 (v16qi, v16qi)
     v8hi __builtin_ia32_paddusw128 (v8hi, v8hi)
     v16qi __builtin_ia32_psubusb128 (v16qi, v16qi)
     v8hi __builtin_ia32_psubusw128 (v8hi, v8hi)
     v8hi __builtin_ia32_pmullw128 (v8hi, v8hi)
     v8hi __builtin_ia32_pmulhw128 (v8hi, v8hi)
     v2di __builtin_ia32_pand128 (v2di, v2di)
     v2di __builtin_ia32_pandn128 (v2di, v2di)
     v2di __builtin_ia32_por128 (v2di, v2di)
     v2di __builtin_ia32_pxor128 (v2di, v2di)
     v16qi __builtin_ia32_pavgb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pavgw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pcpmeqb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pcpmeqw128 (v8hi, v8hi)
     v4si __builtin_ia32_pcpmeqd128 (v4si, v4si)
     v16qi __builtin_ia32_pcpmgtb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pcpmgtw128 (v8hi, v8hi)
     v4si __builtin_ia32_pcpmgtd128 (v4si, v4si)
     v16qi __builtin_ia32_pmaxub128 (v16qi, v16qi)
     v8hii __builtin_ia32_pmaxsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pminub128 (v16qi, v16qi)
     v8hii __builtin_ia32_pminsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_punpckhbw128 (v16qi, v16qi)
     v8hi __builtin_ia32_punpckhwd128 (v8hi, v8hi)
     v4si __builtin_ia32_punpckhdq128 (v4si, v4si)
     v2di __builtin_ia32_punpckhqdq128 (v2di, v2di)
     v16qi __builtin_ia32_punpcklbw128 (v16qi, v16qi)
     v8hi __builtin_ia32_punpcklwd128 (v8hi, v8hi)
     v4si __builtin_ia32_punpckldq128 (v4si, v4si)
     v2di __builtin_ia32_punpcklqdq128 (v2di, v2di)
     v8hi __builtin_ia32_packsswb128 (v8hi, v8hi)
     v4si __builtin_ia32_packssdw128 (v4si, v4si)
     v8hi __builtin_ia32_packuswb128 (v8hi, v8hi)
     v8hi __builtin_ia32_pmulhuw128 (v8hi, v8hi)
     v8hi __builtin_ia32_psllwi128 (v8hi, v8hi)
     v4si __builtin_ia32_pslldi128 (v4si, v4si)
     v2di __builtin_ia32_psllqi128 (v2di, v2di)
     v8hi __builtin_ia32_psrlwi128 (v8hi, v8hi)
     v4si __builtin_ia32_psrldi128 (v4si, v4si)
     v2di __builtin_ia32_psrlqi128 (v2di, v2di)
     v8hi __builtin_ia32_psrawi128 (v8hi, v8hi)
     v4si __builtin_ia32_psradi128 (v4si, v4si)
     v8hi __builtin_ia32_pmaddwd128 (v8hi, v8hi)
     v2di __builtin_ia32_pmovmskb128 (v2di, v2di)
     int __builtin_ia32_movmskpd (v2df)
     v2df __builtin_ia32_sqrtpd (v2df)
     v2df __builtin_ia32_cvtdq2pd (v4si)
     v4sf __builtin_ia32_cvtdq2ps (v4si)
     v2di __builtin_ia32_cvtpd2dq (v2df)
     v2si __builtin_ia32_cvtpd2pi (v2df)
     v2sf __builtin_ia32_cvtpd2ps (v2df)
     v2di __builtin_ia32_cvtpd2dq (v2df)
     v2si __builtin_ia32_cvtpd2pi (v2df)
     v2si __builtin_ia32_cvtsd2si (v2df)
     v2df __builtin_ia32_cvtps2pq (v4sf)
     void __builtin_ia32_maskmovdqu (v16qi, v16qi, char *)
     v2df __builtin_ia32_loadupd (double const *)
     void __builtin_ia32_storeupd (double *, v2df)
     v2df __builtin_ia32_loadhpd (v2df, double const *)
     v2df __builtin_ia32_loadlpd (v2df, double const *)
     void __builtin_ia32_movnti (int *, int)
     void __builtin_ia32_movntpd (double *, v2df)
     void __builtin_ia32_movntpq (v2di *, v2di)
     v4si __builtin_ia32_pshufd (v4si, int)
     v8hi __builtin_ia32_pshuflw (v8hi, int)
     v8hi __builtin_ia32_pshufhw (v8hi, int)
     v2di __builtin_ia32_psadbw128 (v16qi, v16qi)
     v2df __builtin_ia32_sqrtpd (v2df)
     v2df __builtin_ia32_sqrtsd (v2df)
     v2df __builtin_ia32_shufpd (v2df, v2df, int)
     v2df __builtin_ia32_cvtdq2pd (v4si)
     v4sf __builtin_ia32_cvtdq2ps (v4si)
     v4si __builtin_ia32_cvtpd2dq (v2df)
     v2si __builtin_ia32_cvtpd2pi (v2df)
     v4sf __builtin_ia32_cvtpd2ps (v2df)
     v4si __builtin_ia32_cvtbpd2dq (v2df)
     v2si __builtin_ia32_cvttpd2pi (v2df)
     v2df __builtin_ia32_cvtpi2pd (v2si)
     int __builtin_ia32_cvtsd2si (v2df)
     int __builtin_ia32_cvttsd2si (v2df)
     v2di __builtin_ia32_cvtsd2si64 (v2df)
     v2di __builtin_ia32_cvttsd2si64 (v2df)
     v4si __builtin_ia32_cvtps2dq (v4sf)
     v2df __builtin_ia32_cvtps2dq (v4sf)
     v4si __builtin_ia32_cvttps2dq (v4sf)
     v2df __builtin_ia32_cvtsi2sd (v2df, int)
     v2df __builtin_ia32_cvtsi642sd (v2df, v2di)
     v4sf __builtin_ia32_cvtsd2ss (v4sf, v2df)
     v2df __builtin_ia32_cvtss2sd (v2df, v2sf)
     void __builtin_ia32_clflush (void const *)
     void __builtin_ia32_lfence (void)
     void __builtin_ia32_mfence (void)
     v16qi __builtin_ia32_loaddqu (char const *)
     void __builtin_ia32_storedqu (char const *, v16qi)
     di __builtin_ia32_pmuludq (v2si, v2si)
     v2di __builtin_ia32_pmuludq128 (v4si, v4si)
     v8hi __builtin_ia32_psllw128 (v8hi, v2di)
     v4si __builtin_ia32_pslld128 (v4si, v2di)
     v2di __builtin_ia32_psllq128 (v2di, v2di)
     v8hi __builtin_ia32_psrlw128 (v8hi, v2di)
     v4si __builtin_ia32_psrld128 (v4si, v2di)
     v2di __builtin_ia32_psrlq128 (v2di, v2di)
     v8hi __builtin_ia32_psraw128 (v8hi, v2di)
     v4si __builtin_ia32_psrad128 (v4si, v2di)
     v2di __builtin_ia32_pslldqi128 (v2di, int)
     v8hi __builtin_ia32_psllwi128 (v8hi, int)
     v4si __builtin_ia32_pslldi128 (v4si, int)
     v2di __builtin_ia32_psllqi128 (v2di, int)
     v2di __builtin_ia32_psrldqi128 (v2di, int)
     v8hi __builtin_ia32_psrlwi128 (v8hi, int)
     v4si __builtin_ia32_psrldi128 (v4si, int)
     v2di __builtin_ia32_psrlqi128 (v2di, int)
     v8hi __builtin_ia32_psrawi128 (v8hi, int)
     v4si __builtin_ia32_psradi128 (v4si, int)
     v4si __builtin_ia32_pmaddwd128 (v8hi, v8hi)

The following built-in functions are available when -msse3 is used. All of them generate the machine instruction that is part of the name.

     v2df __builtin_ia32_addsubpd (v2df, v2df)
     v2df __builtin_ia32_addsubps (v2df, v2df)
     v2df __builtin_ia32_haddpd (v2df, v2df)
     v2df __builtin_ia32_haddps (v2df, v2df)
     v2df __builtin_ia32_hsubpd (v2df, v2df)
     v2df __builtin_ia32_hsubps (v2df, v2df)
     v16qi __builtin_ia32_lddqu (char const *)
     void __builtin_ia32_monitor (void *, unsigned int, unsigned int)
     v2df __builtin_ia32_movddup (v2df)
     v4sf __builtin_ia32_movshdup (v4sf)
     v4sf __builtin_ia32_movsldup (v4sf)
     void __builtin_ia32_mwait (unsigned int, unsigned int)

The following built-in functions are available when -msse3 is used.

v2df __builtin_ia32_loadddup (double const *)
Generates the movddup machine instruction as a load from memory.

The following built-in functions are available when -mssse3 is used. All of them generate the machine instruction that is part of the name with MMX registers.

     v2si __builtin_ia32_phaddd (v2si, v2si)
     v4hi __builtin_ia32_phaddw (v4hi, v4hi)
     v4hi __builtin_ia32_phaddsw (v4hi, v4hi)
     v2si __builtin_ia32_phsubd (v2si, v2si)
     v4hi __builtin_ia32_phsubw (v4hi, v4hi)
     v4hi __builtin_ia32_phsubsw (v4hi, v4hi)
     v8qi __builtin_ia32_pmaddubsw (v8qi, v8qi)
     v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi)
     v8qi __builtin_ia32_pshufb (v8qi, v8qi)
     v8qi __builtin_ia32_psignb (v8qi, v8qi)
     v2si __builtin_ia32_psignd (v2si, v2si)
     v4hi __builtin_ia32_psignw (v4hi, v4hi)
     v1di __builtin_ia32_palignr (v1di, v1di, int)
     v8qi __builtin_ia32_pabsb (v8qi)
     v2si __builtin_ia32_pabsd (v2si)
     v4hi __builtin_ia32_pabsw (v4hi)

The following built-in functions are available when -mssse3 is used. All of them generate the machine instruction that is part of the name with SSE registers.

     v4si __builtin_ia32_phaddd128 (v4si, v4si)
     v8hi __builtin_ia32_phaddw128 (v8hi, v8hi)
     v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi)
     v4si __builtin_ia32_phsubd128 (v4si, v4si)
     v8hi __builtin_ia32_phsubw128 (v8hi, v8hi)
     v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pmaddubsw128 (v16qi, v16qi)
     v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pshufb128 (v16qi, v16qi)
     v16qi __builtin_ia32_psignb128 (v16qi, v16qi)
     v4si __builtin_ia32_psignd128 (v4si, v4si)
     v8hi __builtin_ia32_psignw128 (v8hi, v8hi)
     v2di __builtin_ia32_palignr (v2di, v2di, int)
     v16qi __builtin_ia32_pabsb128 (v16qi)
     v4si __builtin_ia32_pabsd128 (v4si)
     v8hi __builtin_ia32_pabsw128 (v8hi)

The following built-in functions are available when -m3dnow is used. All of them generate the machine instruction that is part of the name.

     void __builtin_ia32_femms (void)
     v8qi __builtin_ia32_pavgusb (v8qi, v8qi)
     v2si __builtin_ia32_pf2id (v2sf)
     v2sf __builtin_ia32_pfacc (v2sf, v2sf)
     v2sf __builtin_ia32_pfadd (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpeq (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpge (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpgt (v2sf, v2sf)
     v2sf __builtin_ia32_pfmax (v2sf, v2sf)
     v2sf __builtin_ia32_pfmin (v2sf, v2sf)
     v2sf __builtin_ia32_pfmul (v2sf, v2sf)
     v2sf __builtin_ia32_pfrcp (v2sf)
     v2sf __builtin_ia32_pfrcpit1 (v2sf, v2sf)
     v2sf __builtin_ia32_pfrcpit2 (v2sf, v2sf)
     v2sf __builtin_ia32_pfrsqrt (v2sf)
     v2sf __builtin_ia32_pfrsqrtit1 (v2sf, v2sf)
     v2sf __builtin_ia32_pfsub (v2sf, v2sf)
     v2sf __builtin_ia32_pfsubr (v2sf, v2sf)
     v2sf __builtin_ia32_pi2fd (v2si)
     v4hi __builtin_ia32_pmulhrw (v4hi, v4hi)

The following built-in functions are available when both -m3dnow and -march=athlon are used. All of them generate the machine instruction that is part of the name.

     v2si __builtin_ia32_pf2iw (v2sf)
     v2sf __builtin_ia32_pfnacc (v2sf, v2sf)
     v2sf __builtin_ia32_pfpnacc (v2sf, v2sf)
     v2sf __builtin_ia32_pi2fw (v2si)
     v2sf __builtin_ia32_pswapdsf (v2sf)
     v2si __builtin_ia32_pswapdsi (v2si)