; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL

define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_add_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    addsd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_add_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone


define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_cmp_pd:
; SSE:       ## BB#0:
; SSE-NEXT:    cmpordpd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cmp_pd:
; KNL:       ## BB#0:
; KNL-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone


define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_cmp_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    cmpordsd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cmp_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone


define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comieq_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    comisd %xmm1, %xmm0
; SSE-NEXT:    setnp %al
; SSE-NEXT:    sete %cl
; SSE-NEXT:    andb %al, %cl
; SSE-NEXT:    movzbl %cl, %eax
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_comieq_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vcomisd %xmm1, %xmm0
; KNL-NEXT:    setnp %al
; KNL-NEXT:    sete %cl
; KNL-NEXT:    andb %al, %cl
; KNL-NEXT:    movzbl %cl, %eax
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comige_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    xorl %eax, %eax
; SSE-NEXT:    comisd %xmm1, %xmm0
; SSE-NEXT:    setae %al
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_comige_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    xorl %eax, %eax
; KNL-NEXT:    vcomisd %xmm1, %xmm0
; KNL-NEXT:    setae %al
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comigt_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    xorl %eax, %eax
; SSE-NEXT:    comisd %xmm1, %xmm0
; SSE-NEXT:    seta %al
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_comigt_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    xorl %eax, %eax
; KNL-NEXT:    vcomisd %xmm1, %xmm0
; KNL-NEXT:    seta %al
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comile_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    xorl %eax, %eax
; SSE-NEXT:    comisd %xmm0, %xmm1
; SSE-NEXT:    setae %al
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_comile_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    xorl %eax, %eax
; KNL-NEXT:    vcomisd %xmm0, %xmm1
; KNL-NEXT:    setae %al
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comilt_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    xorl %eax, %eax
; SSE-NEXT:    comisd %xmm0, %xmm1
; SSE-NEXT:    seta %al
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_comilt_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    xorl %eax, %eax
; KNL-NEXT:    vcomisd %xmm0, %xmm1
; KNL-NEXT:    seta %al
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_comineq_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    comisd %xmm1, %xmm0
; SSE-NEXT:    setp %al
; SSE-NEXT:    setne %cl
; SSE-NEXT:    orb %al, %cl
; SSE-NEXT:    movzbl %cl, %eax
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_comineq_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vcomisd %xmm1, %xmm0
; KNL-NEXT:    setp %al
; KNL-NEXT:    setne %cl
; KNL-NEXT:    orb %al, %cl
; KNL-NEXT:    movzbl %cl, %eax
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone


define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
; SSE-LABEL: test_x86_sse2_cvtdq2ps:
; SSE:       ## BB#0:
; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtdq2ps:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvtdq2ps %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
  ret <4 x float> %res
}
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone


define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvtpd2dq:
; SSE:       ## BB#0:
; SSE-NEXT:    cvtpd2dq %xmm0, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtpd2dq:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvtpd2dq %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone


define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvtpd2ps:
; SSE:       ## BB#0:
; SSE-NEXT:    cvtpd2ps %xmm0, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtpd2ps:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvtpd2ps %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
  ret <4 x float> %res
}
declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone


define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
; SSE-LABEL: test_x86_sse2_cvtps2dq:
; SSE:       ## BB#0:
; SSE-NEXT:    cvtps2dq %xmm0, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtps2dq:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvtps2dq %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone


define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvtsd2si:
; SSE:       ## BB#0:
; SSE-NEXT:    cvtsd2si %xmm0, %eax
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtsd2si:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvtsd2si %xmm0, %eax
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone


define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_cvtsd2ss:
; SSE:       ## BB#0:
; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtsd2ss:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
  ret <4 x float> %res
}
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone


define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* %p1) {
; SSE-LABEL: test_x86_sse2_cvtsd2ss_load:
; SSE:       ## BB#0:
; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT:    movaps (%eax), %xmm1
; SSE-NEXT:    cvtsd2ss %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtsd2ss_load:
; KNL:       ## BB#0:
; KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
; KNL-NEXT:    vcvtsd2ss (%eax), %xmm0, %xmm0
; KNL-NEXT:    retl
  %a1 = load <2 x double>, <2 x double>* %p1
  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
  ret <4 x float> %res
}


define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
; SSE-LABEL: test_x86_sse2_cvtsi2sd:
; SSE:       ## BB#0:
; SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtsi2sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone


define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
; SSE-LABEL: test_x86_sse2_cvtss2sd:
; SSE:       ## BB#0:
; SSE-NEXT:    cvtss2sd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtss2sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone


define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) {
; SSE-LABEL: test_x86_sse2_cvtss2sd_load:
; SSE:       ## BB#0:
; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; SSE-NEXT:    movaps (%eax), %xmm1
; SSE-NEXT:    cvtss2sd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvtss2sd_load:
; KNL:       ## BB#0:
; KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
; KNL-NEXT:    vcvtss2sd (%eax), %xmm0, %xmm0
; KNL-NEXT:    retl
  %a1 = load <4 x float>, <4 x float>* %p1
  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}


define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvttpd2dq:
; SSE:       ## BB#0:
; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvttpd2dq:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvttpd2dq %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone


define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
; SSE-LABEL: test_x86_sse2_cvttps2dq:
; SSE:       ## BB#0:
; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvttps2dq:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvttps2dq %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone


define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_cvttsd2si:
; SSE:       ## BB#0:
; SSE-NEXT:    cvttsd2si %xmm0, %eax
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_cvttsd2si:
; KNL:       ## BB#0:
; KNL-NEXT:    vcvttsd2si %xmm0, %eax
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone


define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_div_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    divsd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_div_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone



define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_max_pd:
; SSE:       ## BB#0:
; SSE-NEXT:    maxpd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_max_pd:
; KNL:       ## BB#0:
; KNL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone


define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_max_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    maxsd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_max_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone


define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_min_pd:
; SSE:       ## BB#0:
; SSE-NEXT:    minpd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_min_pd:
; KNL:       ## BB#0:
; KNL-NEXT:    vminpd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone


define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_min_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    minsd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_min_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vminsd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_movmsk_pd:
; SSE:       ## BB#0:
; SSE-NEXT:    movmskpd %xmm0, %eax
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_movmsk_pd:
; KNL:       ## BB#0:
; KNL-NEXT:    vmovmskpd %xmm0, %eax
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone




define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_mul_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    mulsd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_mul_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone


define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: test_x86_sse2_packssdw_128:
; SSE:       ## BB#0:
; SSE-NEXT:    packssdw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_packssdw_128:
; KNL:       ## BB#0:
; KNL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone


define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_packsswb_128:
; SSE:       ## BB#0:
; SSE-NEXT:    packsswb %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_packsswb_128:
; KNL:       ## BB#0:
; KNL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone


define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_packuswb_128:
; SSE:       ## BB#0:
; SSE-NEXT:    packuswb %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_packuswb_128:
; KNL:       ## BB#0:
; KNL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone


define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_padds_b:
; SSE:       ## BB#0:
; SSE-NEXT:    paddsb %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_padds_b:
; KNL:       ## BB#0:
; KNL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone


define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_padds_w:
; SSE:       ## BB#0:
; SSE-NEXT:    paddsw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_padds_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone


define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_paddus_b:
; SSE:       ## BB#0:
; SSE-NEXT:    paddusb %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_paddus_b:
; KNL:       ## BB#0:
; KNL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone


define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_paddus_w:
; SSE:       ## BB#0:
; SSE-NEXT:    paddusw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_paddus_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone


define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_pavg_b:
; SSE:       ## BB#0:
; SSE-NEXT:    pavgb %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pavg_b:
; KNL:       ## BB#0:
; KNL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone


define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_pavg_w:
; SSE:       ## BB#0:
; SSE-NEXT:    pavgw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pavg_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone


define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_pmadd_wd:
; SSE:       ## BB#0:
; SSE-NEXT:    pmaddwd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pmadd_wd:
; KNL:       ## BB#0:
; KNL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone


define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_pmaxs_w:
; SSE:       ## BB#0:
; SSE-NEXT:    pmaxsw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pmaxs_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone


define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_pmaxu_b:
; SSE:       ## BB#0:
; SSE-NEXT:    pmaxub %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pmaxu_b:
; KNL:       ## BB#0:
; KNL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone


define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_pmins_w:
; SSE:       ## BB#0:
; SSE-NEXT:    pminsw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pmins_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone


define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_pminu_b:
; SSE:       ## BB#0:
; SSE-NEXT:    pminub %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pminu_b:
; KNL:       ## BB#0:
; KNL-NEXT:    vpminub %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone


define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
; SSE-LABEL: test_x86_sse2_pmovmskb_128:
; SSE:       ## BB#0:
; SSE-NEXT:    pmovmskb %xmm0, %eax
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pmovmskb_128:
; KNL:       ## BB#0:
; KNL-NEXT:    vpmovmskb %xmm0, %eax
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone


define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_pmulh_w:
; SSE:       ## BB#0:
; SSE-NEXT:    pmulhw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pmulh_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone


define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_pmulhu_w:
; SSE:       ## BB#0:
; SSE-NEXT:    pmulhuw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pmulhu_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone


define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: test_x86_sse2_pmulu_dq:
; SSE:       ## BB#0:
; SSE-NEXT:    pmuludq %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pmulu_dq:
; KNL:       ## BB#0:
; KNL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
  ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone


define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_psad_bw:
; SSE:       ## BB#0:
; SSE-NEXT:    psadbw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psad_bw:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
  ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone


define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: test_x86_sse2_psll_d:
; SSE:       ## BB#0:
; SSE-NEXT:    pslld %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psll_d:
; KNL:       ## BB#0:
; KNL-NEXT:    vpslld %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone


define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; SSE-LABEL: test_x86_sse2_psll_q:
; SSE:       ## BB#0:
; SSE-NEXT:    psllq %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psll_q:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
  ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone


define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_psll_w:
; SSE:       ## BB#0:
; SSE-NEXT:    psllw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psll_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone


define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
; SSE-LABEL: test_x86_sse2_pslli_d:
; SSE:       ## BB#0:
; SSE-NEXT:    pslld $7, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pslli_d:
; KNL:       ## BB#0:
; KNL-NEXT:    vpslld $7, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
; SSE-LABEL: test_x86_sse2_pslli_q:
; SSE:       ## BB#0:
; SSE-NEXT:    psllq $7, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pslli_q:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsllq $7, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
  ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone


define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
; SSE-LABEL: test_x86_sse2_pslli_w:
; SSE:       ## BB#0:
; SSE-NEXT:    psllw $7, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pslli_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsllw $7, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone


define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: test_x86_sse2_psra_d:
; SSE:       ## BB#0:
; SSE-NEXT:    psrad %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psra_d:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone


define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_psra_w:
; SSE:       ## BB#0:
; SSE-NEXT:    psraw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psra_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone


define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
; SSE-LABEL: test_x86_sse2_psrai_d:
; SSE:       ## BB#0:
; SSE-NEXT:    psrad $7, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psrai_d:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsrad $7, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone


define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
; SSE-LABEL: test_x86_sse2_psrai_w:
; SSE:       ## BB#0:
; SSE-NEXT:    psraw $7, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psrai_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsraw $7, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone


define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: test_x86_sse2_psrl_d:
; SSE:       ## BB#0:
; SSE-NEXT:    psrld %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psrl_d:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone


define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; SSE-LABEL: test_x86_sse2_psrl_q:
; SSE:       ## BB#0:
; SSE-NEXT:    psrlq %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psrl_q:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
  ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone


define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_psrl_w:
; SSE:       ## BB#0:
; SSE-NEXT:    psrlw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psrl_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone


define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
; SSE-LABEL: test_x86_sse2_psrli_d:
; SSE:       ## BB#0:
; SSE-NEXT:    psrld $7, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psrli_d:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsrld $7, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
  ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
; SSE-LABEL: test_x86_sse2_psrli_q:
; SSE:       ## BB#0:
; SSE-NEXT:    psrlq $7, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psrli_q:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsrlq $7, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
  ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone


define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
; SSE-LABEL: test_x86_sse2_psrli_w:
; SSE:       ## BB#0:
; SSE-NEXT:    psrlw $7, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psrli_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsrlw $7, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone


define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_psubs_b:
; SSE:       ## BB#0:
; SSE-NEXT:    psubsb %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psubs_b:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone


define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_psubs_w:
; SSE:       ## BB#0:
; SSE-NEXT:    psubsw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psubs_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone


define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_psubus_b:
; SSE:       ## BB#0:
; SSE-NEXT:    psubusb %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psubus_b:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
  ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone


define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: test_x86_sse2_psubus_w:
; SSE:       ## BB#0:
; SSE-NEXT:    psubusw %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_psubus_w:
; KNL:       ## BB#0:
; KNL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
  ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone


define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_sqrt_pd:
; SSE:       ## BB#0:
; SSE-NEXT:    sqrtpd %xmm0, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_sqrt_pd:
; KNL:       ## BB#0:
; KNL-NEXT:    vsqrtpd %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone


define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
; SSE-LABEL: test_x86_sse2_sqrt_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    sqrtsd %xmm0, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_sqrt_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone


define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_sub_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    subsd %xmm1, %xmm0
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_sub_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
; KNL-NEXT:    retl
  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
  ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomieq_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    ucomisd %xmm1, %xmm0
; SSE-NEXT:    setnp %al
; SSE-NEXT:    sete %cl
; SSE-NEXT:    andb %al, %cl
; SSE-NEXT:    movzbl %cl, %eax
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_ucomieq_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vucomisd %xmm1, %xmm0
; KNL-NEXT:    setnp %al
; KNL-NEXT:    sete %cl
; KNL-NEXT:    andb %al, %cl
; KNL-NEXT:    movzbl %cl, %eax
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomige_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    xorl %eax, %eax
; SSE-NEXT:    ucomisd %xmm1, %xmm0
; SSE-NEXT:    setae %al
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_ucomige_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    xorl %eax, %eax
; KNL-NEXT:    vucomisd %xmm1, %xmm0
; KNL-NEXT:    setae %al
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomigt_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    xorl %eax, %eax
; SSE-NEXT:    ucomisd %xmm1, %xmm0
; SSE-NEXT:    seta %al
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_ucomigt_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    xorl %eax, %eax
; KNL-NEXT:    vucomisd %xmm1, %xmm0
; KNL-NEXT:    seta %al
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomile_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    xorl %eax, %eax
; SSE-NEXT:    ucomisd %xmm0, %xmm1
; SSE-NEXT:    setae %al
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_ucomile_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    xorl %eax, %eax
; KNL-NEXT:    vucomisd %xmm0, %xmm1
; KNL-NEXT:    setae %al
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomilt_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    xorl %eax, %eax
; SSE-NEXT:    ucomisd %xmm0, %xmm1
; SSE-NEXT:    seta %al
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_ucomilt_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    xorl %eax, %eax
; KNL-NEXT:    vucomisd %xmm0, %xmm1
; KNL-NEXT:    seta %al
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone


define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_x86_sse2_ucomineq_sd:
; SSE:       ## BB#0:
; SSE-NEXT:    ucomisd %xmm1, %xmm0
; SSE-NEXT:    setp %al
; SSE-NEXT:    setne %cl
; SSE-NEXT:    orb %al, %cl
; SSE-NEXT:    movzbl %cl, %eax
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_ucomineq_sd:
; KNL:       ## BB#0:
; KNL-NEXT:    vucomisd %xmm1, %xmm0
; KNL-NEXT:    setp %al
; KNL-NEXT:    setne %cl
; KNL-NEXT:    orb %al, %cl
; KNL-NEXT:    movzbl %cl, %eax
; KNL-NEXT:    retl
  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
  ret i32 %res
}
declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone

define void @test_x86_sse2_pause() {
; SSE-LABEL: test_x86_sse2_pause:
; SSE:       ## BB#0:
; SSE-NEXT:    pause
; SSE-NEXT:    retl
;
; KNL-LABEL: test_x86_sse2_pause:
; KNL:       ## BB#0:
; KNL-NEXT:    pause
; KNL-NEXT:    retl
  tail call void @llvm.x86.sse2.pause()
  ret void
}
declare void @llvm.x86.sse2.pause() nounwind
