| /* |
| * Copyright © 2017 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| */ |
| |
| #include "../i915_selftest.h" |
| #include "i915_random.h" |
| |
| #include "mock_gem_device.h" |
| #include "mock_timeline.h" |
| |
| struct __igt_sync { |
| const char *name; |
| u32 seqno; |
| bool expected; |
| bool set; |
| }; |
| |
| static int __igt_sync(struct intel_timeline *tl, |
| u64 ctx, |
| const struct __igt_sync *p, |
| const char *name) |
| { |
| int ret; |
| |
| if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { |
| pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", |
| name, p->name, ctx, p->seqno, yesno(p->expected)); |
| return -EINVAL; |
| } |
| |
| if (p->set) { |
| ret = __intel_timeline_sync_set(tl, ctx, p->seqno); |
| if (ret) |
| return ret; |
| } |
| |
| return 0; |
| } |
| |
| static int igt_sync(void *arg) |
| { |
| const struct __igt_sync pass[] = { |
| { "unset", 0, false, false }, |
| { "new", 0, false, true }, |
| { "0a", 0, true, true }, |
| { "1a", 1, false, true }, |
| { "1b", 1, true, true }, |
| { "0b", 0, true, false }, |
| { "2a", 2, false, true }, |
| { "4", 4, false, true }, |
| { "INT_MAX", INT_MAX, false, true }, |
| { "INT_MAX-1", INT_MAX-1, true, false }, |
| { "INT_MAX+1", (u32)INT_MAX+1, false, true }, |
| { "INT_MAX", INT_MAX, true, false }, |
| { "UINT_MAX", UINT_MAX, false, true }, |
| { "wrap", 0, false, true }, |
| { "unwrap", UINT_MAX, true, false }, |
| {}, |
| }, *p; |
| struct intel_timeline *tl; |
| int order, offset; |
| int ret; |
| |
| tl = mock_timeline(0); |
| if (!tl) |
| return -ENOMEM; |
| |
| for (p = pass; p->name; p++) { |
| for (order = 1; order < 64; order++) { |
| for (offset = -1; offset <= (order > 1); offset++) { |
| u64 ctx = BIT_ULL(order) + offset; |
| |
| ret = __igt_sync(tl, ctx, p, "1"); |
| if (ret) |
| goto out; |
| } |
| } |
| } |
| mock_timeline_destroy(tl); |
| |
| tl = mock_timeline(0); |
| if (!tl) |
| return -ENOMEM; |
| |
| for (order = 1; order < 64; order++) { |
| for (offset = -1; offset <= (order > 1); offset++) { |
| u64 ctx = BIT_ULL(order) + offset; |
| |
| for (p = pass; p->name; p++) { |
| ret = __igt_sync(tl, ctx, p, "2"); |
| if (ret) |
| goto out; |
| } |
| } |
| } |
| |
| out: |
| mock_timeline_destroy(tl); |
| return ret; |
| } |
| |
| static unsigned int random_engine(struct rnd_state *rnd) |
| { |
| return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32; |
| } |
| |
| static int bench_sync(void *arg) |
| { |
| struct rnd_state prng; |
| struct intel_timeline *tl; |
| unsigned long end_time, count; |
| u64 prng32_1M; |
| ktime_t kt; |
| int order, last_order; |
| |
| tl = mock_timeline(0); |
| if (!tl) |
| return -ENOMEM; |
| |
| /* Lookups from cache are very fast and so the random number generation |
| * and the loop itself becomes a significant factor in the per-iteration |
| * timings. We try to compensate the results by measuring the overhead |
| * of the prng and subtract it from the reported results. |
| */ |
| prandom_seed_state(&prng, i915_selftest.random_seed); |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| u32 x; |
| |
| /* Make sure the compiler doesn't optimise away the prng call */ |
| WRITE_ONCE(x, prandom_u32_state(&prng)); |
| |
| count++; |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| pr_debug("%s: %lu random evaluations, %lluns/prng\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); |
| |
| /* Benchmark (only) setting random context ids */ |
| prandom_seed_state(&prng, i915_selftest.random_seed); |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| u64 id = i915_prandom_u64_state(&prng); |
| |
| __intel_timeline_sync_set(tl, id, 0); |
| count++; |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
| pr_info("%s: %lu random insertions, %lluns/insert\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| |
| /* Benchmark looking up the exact same context ids as we just set */ |
| prandom_seed_state(&prng, i915_selftest.random_seed); |
| end_time = count; |
| kt = ktime_get(); |
| while (end_time--) { |
| u64 id = i915_prandom_u64_state(&prng); |
| |
| if (!__intel_timeline_sync_is_later(tl, id, 0)) { |
| mock_timeline_destroy(tl); |
| pr_err("Lookup of %llu failed\n", id); |
| return -EINVAL; |
| } |
| } |
| kt = ktime_sub(ktime_get(), kt); |
| kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
| pr_info("%s: %lu random lookups, %lluns/lookup\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| |
| mock_timeline_destroy(tl); |
| cond_resched(); |
| |
| tl = mock_timeline(0); |
| if (!tl) |
| return -ENOMEM; |
| |
| /* Benchmark setting the first N (in order) contexts */ |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| __intel_timeline_sync_set(tl, count++, 0); |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| pr_info("%s: %lu in-order insertions, %lluns/insert\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| |
| /* Benchmark looking up the exact same context ids as we just set */ |
| end_time = count; |
| kt = ktime_get(); |
| while (end_time--) { |
| if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { |
| pr_err("Lookup of %lu failed\n", end_time); |
| mock_timeline_destroy(tl); |
| return -EINVAL; |
| } |
| } |
| kt = ktime_sub(ktime_get(), kt); |
| pr_info("%s: %lu in-order lookups, %lluns/lookup\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| |
| mock_timeline_destroy(tl); |
| cond_resched(); |
| |
| tl = mock_timeline(0); |
| if (!tl) |
| return -ENOMEM; |
| |
| /* Benchmark searching for a random context id and maybe changing it */ |
| prandom_seed_state(&prng, i915_selftest.random_seed); |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| u32 id = random_engine(&prng); |
| u32 seqno = prandom_u32_state(&prng); |
| |
| if (!__intel_timeline_sync_is_later(tl, id, seqno)) |
| __intel_timeline_sync_set(tl, id, seqno); |
| |
| count++; |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
| pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| mock_timeline_destroy(tl); |
| cond_resched(); |
| |
| /* Benchmark searching for a known context id and changing the seqno */ |
| for (last_order = 1, order = 1; order < 32; |
| ({ int tmp = last_order; last_order = order; order += tmp; })) { |
| unsigned int mask = BIT(order) - 1; |
| |
| tl = mock_timeline(0); |
| if (!tl) |
| return -ENOMEM; |
| |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| /* Without assuming too many details of the underlying |
| * implementation, try to identify its phase-changes |
| * (if any)! |
| */ |
| u64 id = (u64)(count & mask) << order; |
| |
| __intel_timeline_sync_is_later(tl, id, 0); |
| __intel_timeline_sync_set(tl, id, 0); |
| |
| count++; |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", |
| __func__, count, order, |
| (long long)div64_ul(ktime_to_ns(kt), count)); |
| mock_timeline_destroy(tl); |
| cond_resched(); |
| } |
| |
| return 0; |
| } |
| |
| int i915_gem_timeline_mock_selftests(void) |
| { |
| static const struct i915_subtest tests[] = { |
| SUBTEST(igt_sync), |
| SUBTEST(bench_sync), |
| }; |
| |
| return i915_subtests(tests, NULL); |
| } |