master
  1/*===--------------- sha512intrin.h - SHA512 intrinsics -----------------===
  2 *
  3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4 * See https://llvm.org/LICENSE.txt for license information.
  5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6 *
  7 *===-----------------------------------------------------------------------===
  8 */
  9
 10#ifndef __IMMINTRIN_H
 11#error "Never use <sha512intrin.h> directly; include <immintrin.h> instead."
 12#endif // __IMMINTRIN_H
 13
 14#ifndef __SHA512INTRIN_H
 15#define __SHA512INTRIN_H
 16
 17#define __DEFAULT_FN_ATTRS256                                                  \
 18  __attribute__((__always_inline__, __nodebug__, __target__("sha512"),         \
 19                 __min_vector_width__(256)))
 20
 21/// This intrinisc is one of the two SHA512 message scheduling instructions.
 22///    The intrinsic performs an intermediate calculation for the next four
 23///    SHA512 message qwords. The calculated results are stored in \a dst.
 24///
 25/// \headerfile <immintrin.h>
 26///
 27/// \code
 28/// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B)
 29/// \endcode
 30///
 31/// This intrinsic corresponds to the \c VSHA512MSG1 instruction.
 32///
 33/// \param __A
 34///    A 256-bit vector of [4 x long long].
 35/// \param __B
 36///    A 128-bit vector of [2 x long long].
 37/// \returns
 38///    A 256-bit vector of [4 x long long].
 39///
 40/// \code{.operation}
 41/// DEFINE ROR64(qword, n) {
 42/// 	count := n % 64
 43/// 	dest := (qword >> count) | (qword << (64 - count))
 44/// 	RETURN dest
 45/// }
 46/// DEFINE SHR64(qword, n) {
 47/// 	RETURN qword >> n
 48/// }
 49/// DEFINE s0(qword):
 50/// 	RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7)
 51/// }
 52/// W[4] := __B.qword[0]
 53/// W[3] := __A.qword[3]
 54/// W[2] := __A.qword[2]
 55/// W[1] := __A.qword[1]
 56/// W[0] := __A.qword[0]
 57/// dst.qword[3] := W[3] + s0(W[4])
 58/// dst.qword[2] := W[2] + s0(W[3])
 59/// dst.qword[1] := W[1] + s0(W[2])
 60/// dst.qword[0] := W[0] + s0(W[1])
 61/// dst[MAX:256] := 0
 62/// \endcode
 63static __inline__ __m256i __DEFAULT_FN_ATTRS256
 64_mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
 65  return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B);
 66}
 67
 68/// This intrinisc is one of the two SHA512 message scheduling instructions.
 69///    The intrinsic performs the final calculation for the next four SHA512
 70///    message qwords. The calculated results are stored in \a dst.
 71///
 72/// \headerfile <immintrin.h>
 73///
 74/// \code
 75/// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B)
 76/// \endcode
 77///
 78/// This intrinsic corresponds to the \c VSHA512MSG2 instruction.
 79///
 80/// \param __A
 81///    A 256-bit vector of [4 x long long].
 82/// \param __B
 83///    A 256-bit vector of [4 x long long].
 84/// \returns
 85///    A 256-bit vector of [4 x long long].
 86///
 87/// \code{.operation}
 88/// DEFINE ROR64(qword, n) {
 89/// 	count := n % 64
 90/// 	dest := (qword >> count) | (qword << (64 - count))
 91/// 	RETURN dest
 92/// }
 93/// DEFINE SHR64(qword, n) {
 94/// 	RETURN qword >> n
 95/// }
 96/// DEFINE s1(qword) {
 97/// 	RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6)
 98/// }
 99/// W[14] := __B.qword[2]
100/// W[15] := __B.qword[3]
101/// W[16] := __A.qword[0] + s1(W[14])
102/// W[17] := __A.qword[1] + s1(W[15])
103/// W[18] := __A.qword[2] + s1(W[16])
104/// W[19] := __A.qword[3] + s1(W[17])
105/// dst.qword[3] := W[19]
106/// dst.qword[2] := W[18]
107/// dst.qword[1] := W[17]
108/// dst.qword[0] := W[16]
109/// dst[MAX:256] := 0
110/// \endcode
111static __inline__ __m256i __DEFAULT_FN_ATTRS256
112_mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
113  return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B);
114}
115
116/// This intrinisc performs two rounds of SHA512 operation using initial SHA512
117///    state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from
118///    \a __A, and a pre-computed sum of the next two round message qwords and
119///    the corresponding round constants from \a __C (only the two lower qwords
120///    of the third operand). The updated SHA512 state (A,B,E,F) is written to
121///    \a __A, and \a __A can be used as the updated state (C,D,G,H) in later
122///    rounds.
123///
124/// \headerfile <immintrin.h>
125///
126/// \code
127/// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C)
128/// \endcode
129///
130/// This intrinsic corresponds to the \c VSHA512RNDS2 instruction.
131///
132/// \param __A
133///    A 256-bit vector of [4 x long long].
134/// \param __B
135///    A 256-bit vector of [4 x long long].
136/// \param __C
137///    A 128-bit vector of [2 x long long].
138/// \returns
139///    A 256-bit vector of [4 x long long].
140///
141/// \code{.operation}
142/// DEFINE ROR64(qword, n) {
143/// 	count := n % 64
144/// 	dest := (qword >> count) | (qword << (64 - count))
145/// 	RETURN dest
146/// }
147/// DEFINE SHR64(qword, n) {
148/// 	RETURN qword >> n
149/// }
150/// DEFINE cap_sigma0(qword) {
151/// 	RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39)
152/// }
153/// DEFINE cap_sigma1(qword) {
154/// 	RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41)
155/// }
156/// DEFINE MAJ(a,b,c) {
157/// 	RETURN (a & b) ^ (a & c) ^ (b & c)
158/// }
159/// DEFINE CH(e,f,g) {
160/// 	RETURN (e & f) ^ (g & ~e)
161/// }
162/// A[0] := __B.qword[3]
163/// B[0] := __B.qword[2]
164/// C[0] := __C.qword[3]
165/// D[0] := __C.qword[2]
166/// E[0] := __B.qword[1]
167/// F[0] := __B.qword[0]
168/// G[0] := __C.qword[1]
169/// H[0] := __C.qword[0]
170/// WK[0]:= __A.qword[0]
171/// WK[1]:= __A.qword[1]
172/// FOR i := 0 to 1:
173/// 	A[i+1] := CH(E[i], F[i], G[i]) +
174/// 	cap_sigma1(E[i]) + WK[i] + H[i] +
175/// 	MAJ(A[i], B[i], C[i]) +
176/// 	cap_sigma0(A[i])
177/// 	B[i+1] := A[i]
178/// 	C[i+1] := B[i]
179/// 	D[i+1] := C[i]
180/// 	E[i+1] := CH(E[i], F[i], G[i]) +
181/// 	cap_sigma1(E[i]) + WK[i] + H[i] + D[i]
182/// 	F[i+1] := E[i]
183/// 	G[i+1] := F[i]
184/// 	H[i+1] := G[i]
185/// ENDFOR
186/// dst.qword[3] := A[2]
187/// dst.qword[2] := B[2]
188/// dst.qword[1] := E[2]
189/// dst.qword[0] := F[2]
190/// dst[MAX:256] := 0
191/// \endcode
192static __inline__ __m256i __DEFAULT_FN_ATTRS256
193_mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
194  return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B,
195                                              (__v2du)__C);
196}
197
198#undef __DEFAULT_FN_ATTRS256
199
200#endif // __SHA512INTRIN_H