Nugget
fill_help.h
1 // Copyright (c) Electronic Arts Inc. All rights reserved.
4 
5 
6 #ifndef EASTL_INTERNAL_FILL_HELP_H
7 #define EASTL_INTERNAL_FILL_HELP_H
8 
9 
10 #if defined(EA_PRAGMA_ONCE_SUPPORTED)
11  #pragma once
12 #endif
13 
14 #include <EASTL/internal/config.h>
15 
16 #if defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
17 #include <intrin.h>
18 #endif
19 
20 namespace eastl
21 {
22  // fill
23  //
24  // We implement some fill helper functions in order to allow us to optimize it
25  // where possible.
26  //
27  template <bool bIsScalar>
28  struct fill_imp
29  {
30  template <typename ForwardIterator, typename T>
31  static void do_fill(ForwardIterator first, ForwardIterator last, const T& value)
32  {
33  // The C++ standard doesn't specify whether we need to create a temporary
34  // or not, but all std STL implementations are written like what we have here.
35  for(; first != last; ++first)
36  *first = value;
37  }
38  };
39 
40  template <>
41  struct fill_imp<true>
42  {
43  template <typename ForwardIterator, typename T>
44  static void do_fill(ForwardIterator first, ForwardIterator last, const T& value)
45  {
46  typedef typename eastl::iterator_traits<ForwardIterator>::value_type value_type;
47  // We create a temp and fill from that because value might alias to the
48  // destination range and so the compiler would be forced into generating
49  // less efficient code.
50  for(const T temp = value; first != last; ++first)
51  {
52  EA_UNUSED(temp);
53  *first = static_cast<value_type>(temp);
54  }
55  }
56  };
57 
74  template <typename ForwardIterator, typename T>
75  inline void fill(ForwardIterator first, ForwardIterator last, const T& value)
76  {
77  eastl::fill_imp< is_scalar<T>::value >::do_fill(first, last, value);
78 
79  // Possibly better implementation, as it will deal with small PODs as well as scalars:
80  // bEasyCopy is true if the type has a trivial constructor (e.g. is a POD) and if
81  // it is small. Thus any built-in type or any small user-defined struct will qualify.
82  //const bool bEasyCopy = eastl::type_and<eastl::has_trivial_constructor<T>::value,
83  // eastl::integral_constant<bool, (sizeof(T) <= 16)>::value;
84  //eastl::fill_imp<bEasyCopy>::do_fill(first, last, value);
85 
86  }
87 
88  #if (defined(EA_COMPILER_GNUC) || defined(__clang__)) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
89  #if defined(EA_PROCESSOR_X86_64)
90  template <typename Value>
91  inline void fill(uint64_t* first, uint64_t* last, Value c)
92  {
93  uintptr_t count = (uintptr_t)(last - first);
94  uint64_t value = (uint64_t)(c);
95 
96  __asm__ __volatile__ ("cld\n\t"
97  "rep stosq\n\t"
98  : "+c" (count), "+D" (first), "=m" (first)
99  : "a" (value)
100  : "cc" );
101  }
102 
103 
104  template <typename Value>
105  inline void fill(int64_t* first, int64_t* last, Value c)
106  {
107  uintptr_t count = (uintptr_t)(last - first);
108  int64_t value = (int64_t)(c);
109 
110  __asm__ __volatile__ ("cld\n\t"
111  "rep stosq\n\t"
112  : "+c" (count), "+D" (first), "=m" (first)
113  : "a" (value)
114  : "cc" );
115  }
116  #endif
117 
118  template <typename Value>
119  inline void fill(uint32_t* first, uint32_t* last, Value c)
120  {
121  uintptr_t count = (uintptr_t)(last - first);
122  uint32_t value = (uint32_t)(c);
123 
124  __asm__ __volatile__ ("cld\n\t"
125  "rep stosl\n\t"
126  : "+c" (count), "+D" (first), "=m" (first)
127  : "a" (value)
128  : "cc" );
129  }
130 
131 
132  template <typename Value>
133  inline void fill(int32_t* first, int32_t* last, Value c)
134  {
135  uintptr_t count = (uintptr_t)(last - first);
136  int32_t value = (int32_t)(c);
137 
138  __asm__ __volatile__ ("cld\n\t"
139  "rep stosl\n\t"
140  : "+c" (count), "+D" (first), "=m" (first)
141  : "a" (value)
142  : "cc" );
143  }
144 
145 
146  template <typename Value>
147  inline void fill(uint16_t* first, uint16_t* last, Value c)
148  {
149  uintptr_t count = (uintptr_t)(last - first);
150  uint16_t value = (uint16_t)(c);
151 
152  __asm__ __volatile__ ("cld\n\t"
153  "rep stosw\n\t"
154  : "+c" (count), "+D" (first), "=m" (first)
155  : "a" (value)
156  : "cc" );
157  }
158 
159 
160  template <typename Value>
161  inline void fill(int16_t* first, int16_t* last, Value c)
162  {
163  uintptr_t count = (uintptr_t)(last - first);
164  int16_t value = (int16_t)(c);
165 
166  __asm__ __volatile__ ("cld\n\t"
167  "rep stosw\n\t"
168  : "+c" (count), "+D" (first), "=m" (first)
169  : "a" (value)
170  : "cc" );
171  }
172 
173  #elif defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
174  #if defined(EA_PROCESSOR_X86_64)
175  template <typename Value>
176  inline void fill(uint64_t* first, uint64_t* last, Value c)
177  {
178  __stosq(first, (uint64_t)c, (size_t)(last - first));
179  }
180 
181  template <typename Value>
182  inline void fill(int64_t* first, int64_t* last, Value c)
183  {
184  __stosq((uint64_t*)first, (uint64_t)c, (size_t)(last - first));
185  }
186  #endif
187 
188  template <typename Value>
189  inline void fill(uint32_t* first, uint32_t* last, Value c)
190  {
191  __stosd((unsigned long*)first, (unsigned long)c, (size_t)(last - first));
192  }
193 
194  template <typename Value>
195  inline void fill(int32_t* first, int32_t* last, Value c)
196  {
197  __stosd((unsigned long*)first, (unsigned long)c, (size_t)(last - first));
198  }
199 
200  template <typename Value>
201  inline void fill(uint16_t* first, uint16_t* last, Value c)
202  {
203  __stosw(first, (uint16_t)c, (size_t)(last - first));
204  }
205 
206  template <typename Value>
207  inline void fill(int16_t* first, int16_t* last, Value c)
208  {
209  __stosw((uint16_t*)first, (uint16_t)c, (size_t)(last - first));
210  }
211  #endif
212 
213 
214  inline void fill(char* first, char* last, const char& c) // It's debateable whether we should use 'char& c' or 'char c' here.
215  {
216  __builtin_memset(first, (unsigned char)c, (size_t)(last - first));
217  }
218 
219  inline void fill(char* first, char* last, const int c) // This is used for cases like 'fill(first, last, 0)'.
220  {
221  __builtin_memset(first, (unsigned char)c, (size_t)(last - first));
222  }
223 
224  inline void fill(unsigned char* first, unsigned char* last, const unsigned char& c)
225  {
226  __builtin_memset(first, (unsigned char)c, (size_t)(last - first));
227  }
228 
229  inline void fill(unsigned char* first, unsigned char* last, const int c)
230  {
231  __builtin_memset(first, (unsigned char)c, (size_t)(last - first));
232  }
233 
234  inline void fill(signed char* first, signed char* last, const signed char& c)
235  {
236  __builtin_memset(first, (unsigned char)c, (size_t)(last - first));
237  }
238 
239  inline void fill(signed char* first, signed char* last, const int c)
240  {
241  __builtin_memset(first, (unsigned char)c, (size_t)(last - first));
242  }
243 
244  #if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__ICL) // ICL = Intel compiler
245  inline void fill(bool* first, bool* last, const bool& b)
246  {
247  __builtin_memset(first, (char)b, (size_t)(last - first));
248  }
249  #endif
250 
251 
252 
253 
254  // fill_n
255  //
256  // We implement some fill helper functions in order to allow us to optimize it
257  // where possible.
258  //
259  template <bool bIsScalar>
260  struct fill_n_imp
261  {
262  template <typename OutputIterator, typename Size, typename T>
263  static OutputIterator do_fill(OutputIterator first, Size n, const T& value)
264  {
265  for(; n-- > 0; ++first)
266  *first = value;
267  return first;
268  }
269  };
270 
271  template <>
272  struct fill_n_imp<true>
273  {
274  template <typename OutputIterator, typename Size, typename T>
275  static OutputIterator do_fill(OutputIterator first, Size n, const T& value)
276  {
277  typedef typename eastl::iterator_traits<OutputIterator>::value_type value_type;
278 
279  // We create a temp and fill from that because value might alias to
280  // the destination range and so the compiler would be forced into
281  // generating less efficient code.
282  for(const T temp = value; n-- > 0; ++first)
283  *first = static_cast<value_type>(temp);
284  return first;
285  }
286  };
287 
298  template <typename OutputIterator, typename Size, typename T>
299  OutputIterator fill_n(OutputIterator first, Size n, const T& value)
300  {
301  return eastl::fill_n_imp<is_scalar<T>::value>::do_fill(first, n, value);
302  }
303 
304  template <typename Size>
305  inline char* fill_n(char* first, Size n, const char& c)
306  {
307  return (char*)__builtin_memset(first, (char)c, (size_t)n) + n;
308  }
309 
310  template <typename Size>
311  inline unsigned char* fill_n(unsigned char* first, Size n, const unsigned char& c)
312  {
313  return (unsigned char*)__builtin_memset(first, (unsigned char)c, (size_t)n) + n;
314  }
315 
316  template <typename Size>
317  inline signed char* fill_n(signed char* first, Size n, const signed char& c)
318  {
319  return (signed char*)__builtin_memset(first, (signed char)c, n) + (size_t)n;
320  }
321 
322  #if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__ICL) // ICL = Intel compiler
323  template <typename Size>
324  inline bool* fill_n(bool* first, Size n, const bool& b)
325  {
326  return (bool*)__builtin_memset(first, (char)b, n) + (size_t)n;
327  }
328  #endif
329 
330  #if (defined(EA_COMPILER_GNUC) || defined(__clang__)) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
331  #if defined(EA_PROCESSOR_X86_64)
332  template <typename Size, typename Value>
333  inline uint64_t* fill_n(uint64_t* first, Size n, Value c)
334  {
335  uintptr_t count = (uintptr_t)(n);
336  uint64_t value = (uint64_t)(c);
337 
338  __asm__ __volatile__ ("cld\n\t"
339  "rep stosq\n\t"
340  : "+c" (count), "+D" (first), "=m" (first)
341  : "a" (value)
342  : "cc" );
343  return first; // first is updated by the code above.
344  }
345 
346 
347  template <typename Size, typename Value>
348  inline int64_t* fill_n(int64_t* first, Size n, Value c)
349  {
350  uintptr_t count = (uintptr_t)(n);
351  int64_t value = (int64_t)(c);
352 
353  __asm__ __volatile__ ("cld\n\t"
354  "rep stosq\n\t"
355  : "+c" (count), "+D" (first), "=m" (first)
356  : "a" (value)
357  : "cc" );
358  return first; // first is updated by the code above.
359  }
360  #endif
361 
362  template <typename Size, typename Value>
363  inline uint32_t* fill_n(uint32_t* first, Size n, Value c)
364  {
365  uintptr_t count = (uintptr_t)(n);
366  uint32_t value = (uint32_t)(c);
367 
368  __asm__ __volatile__ ("cld\n\t"
369  "rep stosl\n\t"
370  : "+c" (count), "+D" (first), "=m" (first)
371  : "a" (value)
372  : "cc" );
373  return first; // first is updated by the code above.
374  }
375 
376 
377  template <typename Size, typename Value>
378  inline int32_t* fill_n(int32_t* first, Size n, Value c)
379  {
380  uintptr_t count = (uintptr_t)(n);
381  int32_t value = (int32_t)(c);
382 
383  __asm__ __volatile__ ("cld\n\t"
384  "rep stosl\n\t"
385  : "+c" (count), "+D" (first), "=m" (first)
386  : "a" (value)
387  : "cc" );
388  return first; // first is updated by the code above.
389  }
390 
391 
392  template <typename Size, typename Value>
393  inline uint16_t* fill_n(uint16_t* first, Size n, Value c)
394  {
395  uintptr_t count = (uintptr_t)(n);
396  uint16_t value = (uint16_t)(c);
397 
398  __asm__ __volatile__ ("cld\n\t"
399  "rep stosw\n\t"
400  : "+c" (count), "+D" (first), "=m" (first)
401  : "a" (value)
402  : "cc" );
403  return first; // first is updated by the code above.
404  }
405 
406 
407  template <typename Size, typename Value>
408  inline int16_t* fill_n(int16_t* first, Size n, Value c)
409  {
410  uintptr_t count = (uintptr_t)(n);
411  int16_t value = (int16_t)(c);
412 
413  __asm__ __volatile__ ("cld\n\t"
414  "rep stosw\n\t"
415  : "+c" (count), "+D" (first), "=m" (first)
416  : "a" (value)
417  : "cc" );
418  return first; // first is updated by the code above.
419  }
420 
421  #elif defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
422  #if defined(EA_PROCESSOR_X86_64)
423  template <typename Size, typename Value>
424  inline uint64_t* fill_n(uint64_t* first, Size n, Value c)
425  {
426  __stosq(first, (uint64_t)c, (size_t)n);
427  return first + n;
428  }
429 
430  template <typename Size, typename Value>
431  inline int64_t* fill_n(int64_t* first, Size n, Value c)
432  {
433  __stosq((uint64_t*)first, (uint64_t)c, (size_t)n);
434  return first + n;
435  }
436  #endif
437 
438  template <typename Size, typename Value>
439  inline uint32_t* fill_n(uint32_t* first, Size n, Value c)
440  {
441  __stosd((unsigned long*)first, (unsigned long)c, (size_t)n);
442  return first + n;
443  }
444 
445  template <typename Size, typename Value>
446  inline int32_t* fill_n(int32_t* first, Size n, Value c)
447  {
448  __stosd((unsigned long*)first, (unsigned long)c, (size_t)n);
449  return first + n;
450  }
451 
452  template <typename Size, typename Value>
453  inline uint16_t* fill_n(uint16_t* first, Size n, Value c)
454  {
455  __stosw(first, (uint16_t)c, (size_t)n);
456  return first + n;
457  }
458 
459  template <typename Size, typename Value>
460  inline int16_t* fill_n(int16_t* first, Size n, Value c)
461  {
462  __stosw((uint16_t*)first, (uint16_t)c, (size_t)n);
463  return first + n;
464  }
465  #endif
466 
467 } // namespace eastl
468 
469 #endif // Header include guard
470 
471 
472 
473 
474 
475 
476 
477 
478 
479 
480 
481 
482 
483 
484 
EA Standard Template Library.
Definition: algorithm.h:288
eastl::iterator_traits< InputIterator >::difference_type count(InputIterator first, InputIterator last, const T &value)
Definition: algorithm.h:1347
OutputIterator fill_n(OutputIterator first, Size n, const T &value)
Definition: fill_help.h:299
void fill(ForwardIterator first, ForwardIterator last, const T &value)
Definition: fill_help.h:75
Definition: fill_help.h:29
Definition: fill_help.h:261