ApproximateClock.h 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. #if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)
  2. // Copyright 2023-present Facebook. All Rights Reserved.
  3. #pragma once
  4. #include <c10/macros/Export.h>
  5. #include <array>
  6. #include <chrono>
  7. #include <cstddef>
  8. #include <cstdint>
  9. #include <ctime>
  10. #include <functional>
  11. #include <type_traits>
  12. #if defined(C10_IOS) && defined(C10_MOBILE)
  13. #include <sys/time.h> // for gettimeofday()
  14. #endif
  15. #if defined(__i386__) || defined(__x86_64__) || defined(__amd64__)
  16. #define C10_RDTSC
  17. #if defined(_MSC_VER)
  18. #include <intrin.h>
  19. #elif defined(__CUDACC__) || defined(__HIPCC__)
  20. #undef C10_RDTSC
  21. #elif defined(__clang__)
  22. // `__rdtsc` is available by default.
  23. // NB: This has to be first, because Clang will also define `__GNUC__`
  24. #elif defined(__GNUC__)
  25. #include <x86intrin.h>
  26. #else
  27. #undef C10_RDTSC
  28. #endif
  29. #endif
  30. namespace c10 {
  31. using time_t = int64_t;
  32. using steady_clock_t = std::conditional_t<
  33. std::chrono::high_resolution_clock::is_steady,
  34. std::chrono::high_resolution_clock,
  35. std::chrono::steady_clock>;
  36. inline time_t getTimeSinceEpoch() {
  37. auto now = std::chrono::system_clock::now().time_since_epoch();
  38. return std::chrono::duration_cast<std::chrono::nanoseconds>(now).count();
  39. }
  40. inline time_t getTime(bool allow_monotonic = false) {
  41. #if defined(C10_IOS) && defined(C10_MOBILE)
  42. // clock_gettime is only available on iOS 10.0 or newer. Unlike OS X, iOS
  43. // can't rely on CLOCK_REALTIME, as it is defined no matter if clock_gettime
  44. // is implemented or not
  45. struct timeval now;
  46. gettimeofday(&now, NULL);
  47. return static_cast<time_t>(now.tv_sec) * 1000000000 +
  48. static_cast<time_t>(now.tv_usec) * 1000;
  49. #elif defined(_WIN32) || defined(__MACH__)
  50. return std::chrono::duration_cast<std::chrono::nanoseconds>(
  51. steady_clock_t::now().time_since_epoch())
  52. .count();
  53. #else
  54. // clock_gettime is *much* faster than std::chrono implementation on Linux
  55. struct timespec t{};
  56. auto mode = CLOCK_REALTIME;
  57. if (allow_monotonic) {
  58. mode = CLOCK_MONOTONIC;
  59. }
  60. clock_gettime(mode, &t);
  61. return static_cast<time_t>(t.tv_sec) * 1000000000 +
  62. static_cast<time_t>(t.tv_nsec);
  63. #endif
  64. }
  65. // We often do not need to capture true wall times. If a fast mechanism such
  66. // as TSC is available we can use that instead and convert back to epoch time
  67. // during post processing. This greatly reduce the clock's contribution to
  68. // profiling.
  69. // http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/
  70. // https://quick-bench.com/q/r8opkkGZSJMu9wM_XTbDouq-0Io
  71. // TODO: We should use
  72. // `https://github.com/google/benchmark/blob/main/src/cycleclock.h`
  73. inline auto getApproximateTime() {
  74. #if defined(C10_RDTSC)
  75. return static_cast<uint64_t>(__rdtsc());
  76. #else
  77. return getTime();
  78. #endif
  79. }
  80. using approx_time_t = decltype(getApproximateTime());
  81. static_assert(
  82. std::is_same_v<approx_time_t, int64_t> ||
  83. std::is_same_v<approx_time_t, uint64_t>,
  84. "Expected either int64_t (`getTime`) or uint64_t (some TSC reads).");
  85. // Convert `getCount` results to Nanoseconds since unix epoch.
  86. class C10_API ApproximateClockToUnixTimeConverter final {
  87. public:
  88. ApproximateClockToUnixTimeConverter();
  89. std::function<time_t(approx_time_t)> makeConverter();
  90. struct UnixAndApproximateTimePair {
  91. time_t t_;
  92. approx_time_t approx_t_;
  93. };
  94. static UnixAndApproximateTimePair measurePair();
  95. private:
  96. static constexpr size_t replicates = 1001;
  97. using time_pairs = std::array<UnixAndApproximateTimePair, replicates>;
  98. time_pairs measurePairs();
  99. time_pairs start_times_;
  100. };
  101. } // namespace c10
  102. #else
  103. #error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined."
  104. #endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)