anv_device.c 86 KB
Newer Older
Kristian Høgsberg's avatar
Kristian Høgsberg committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * Copyright © 2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include <assert.h>
#include <stdbool.h>
#include <string.h>
27
#include <sys/mman.h>
28
#include <sys/sysinfo.h>
Kristian Høgsberg's avatar
Kristian Høgsberg committed
29 30
#include <unistd.h>
#include <fcntl.h>
31
#include <xf86drm.h>
32
#include <drm_fourcc.h>
Kristian Høgsberg's avatar
Kristian Høgsberg committed
33

34
#include "anv_private.h"
35
#include "util/strtod.h"
36
#include "util/debug.h"
37
#include "util/build_id.h"
38
#include "util/mesa-sha1.h"
39
#include "vk_util.h"
Kristian Høgsberg's avatar
Kristian Høgsberg committed
40

41
#include "genxml/gen7_pack.h"
42

43 44 45 46 47 48 49 50 51 52 53
static void
compiler_debug_log(void *data, const char *fmt, ...)
{ }

static void
compiler_perf_log(void *data, const char *fmt, ...)
{
   va_list args;
   va_start(args, fmt);

   if (unlikely(INTEL_DEBUG & DEBUG_PERF))
54
      intel_logd_v(fmt, args);
55 56 57 58

   va_end(args);
}

59 60 61 62 63 64 65 66 67
static VkResult
anv_compute_heap_size(int fd, uint64_t *heap_size)
{
   uint64_t gtt_size;
   if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
                                 &gtt_size) == -1) {
      /* If, for whatever reason, we can't actually get the GTT size from the
       * kernel (too old?) fall back to the aperture size.
       */
68 69
      anv_perf_warn(NULL, NULL,
                    "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
70 71

      if (anv_gem_get_aperture(fd, &gtt_size) == -1) {
72
         return vk_errorf(NULL, NULL, VK_ERROR_INITIALIZATION_FAILED,
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
                          "failed to get aperture size: %m");
      }
   }

   /* Query the total ram from the system */
   struct sysinfo info;
   sysinfo(&info);

   uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit;

   /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
    * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
    */
   uint64_t available_ram;
   if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
      available_ram = total_ram / 2;
   else
      available_ram = total_ram * 3 / 4;

   /* We also want to leave some padding for things we allocate in the driver,
    * so don't go over 3/4 of the GTT either.
    */
   uint64_t available_gtt = gtt_size * 3 / 4;

   *heap_size = MIN2(available_ram, available_gtt);

   return VK_SUCCESS;
}

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
static VkResult
anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
{
   /* The kernel query only tells us whether or not the kernel supports the
    * EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and not whether or not the
    * hardware has actual 48bit address support.
    */
   device->supports_48bit_addresses =
      (device->info.gen >= 8) && anv_gem_supports_48b_addresses(fd);

   uint64_t heap_size;
   VkResult result = anv_compute_heap_size(fd, &heap_size);
   if (result != VK_SUCCESS)
      return result;

117 118 119 120 121 122 123 124 125 126 127 128
   if (heap_size > (2ull << 30) && !device->supports_48bit_addresses) {
      /* When running with an overridden PCI ID, we may get a GTT size from
       * the kernel that is greater than 2 GiB but the execbuf check for 48bit
       * address support can still fail.  Just clamp the address space size to
       * 2 GiB if we don't have 48-bit support.
       */
      intel_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
                        "not support for 48-bit addresses",
                        __FILE__, __LINE__);
      heap_size = 2ull << 30;
   }

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
   if (heap_size <= 3ull * (1ull << 30)) {
      /* In this case, everything fits nicely into the 32-bit address space,
       * so there's no need for supporting 48bit addresses on client-allocated
       * memory objects.
       */
      device->memory.heap_count = 1;
      device->memory.heaps[0] = (struct anv_memory_heap) {
         .size = heap_size,
         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
         .supports_48bit_addresses = false,
      };
   } else {
      /* Not everything will fit nicely into a 32-bit address space.  In this
       * case we need a 64-bit heap.  Advertise a small 32-bit heap and a
       * larger 48-bit heap.  If we're in this case, then we have a total heap
       * size larger than 3GiB which most likely means they have 8 GiB of
       * video memory and so carving off 1 GiB for the 32-bit heap should be
       * reasonable.
       */
      const uint64_t heap_size_32bit = 1ull << 30;
      const uint64_t heap_size_48bit = heap_size - heap_size_32bit;

      assert(device->supports_48bit_addresses);

      device->memory.heap_count = 2;
      device->memory.heaps[0] = (struct anv_memory_heap) {
         .size = heap_size_48bit,
         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
         .supports_48bit_addresses = true,
      };
      device->memory.heaps[1] = (struct anv_memory_heap) {
         .size = heap_size_32bit,
         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
         .supports_48bit_addresses = false,
      };
   }
165

166 167 168 169
   uint32_t type_count = 0;
   for (uint32_t heap = 0; heap < device->memory.heap_count; heap++) {
      uint32_t valid_buffer_usage = ~0;

170 171 172 173 174 175 176 177 178 179 180 181
      /* There appears to be a hardware issue in the VF cache where it only
       * considers the bottom 32 bits of memory addresses.  If you happen to
       * have two vertex buffers which get placed exactly 4 GiB apart and use
       * them in back-to-back draw calls, you can get collisions.  In order to
       * solve this problem, we require vertex and index buffers be bound to
       * memory allocated out of the 32-bit heap.
       */
      if (device->memory.heaps[heap].supports_48bit_addresses) {
         valid_buffer_usage &= ~(VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
                                 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
      }

182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
      if (device->info.has_llc) {
         /* Big core GPUs share LLC with the CPU and thus one memory type can be
          * both cached and coherent at the same time.
          */
         device->memory.types[type_count++] = (struct anv_memory_type) {
            .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
                             VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
                             VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
            .heapIndex = heap,
            .valid_buffer_usage = valid_buffer_usage,
         };
      } else {
         /* The spec requires that we expose a host-visible, coherent memory
          * type, but Atom GPUs don't share LLC. Thus we offer two memory types
          * to give the application a choice between cached, but not coherent and
          * coherent but uncached (WC though).
          */
         device->memory.types[type_count++] = (struct anv_memory_type) {
            .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
                             VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
            .heapIndex = heap,
            .valid_buffer_usage = valid_buffer_usage,
         };
         device->memory.types[type_count++] = (struct anv_memory_type) {
            .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
                             VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
            .heapIndex = heap,
            .valid_buffer_usage = valid_buffer_usage,
         };
      }
   }
   device->memory.type_count = type_count;

218 219 220
   return VK_SUCCESS;
}

221 222
static VkResult
anv_physical_device_init_uuids(struct anv_physical_device *device)
223
{
224 225
   const struct build_id_note *note =
      build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
226
   if (!note) {
227 228
      return vk_errorf(device->instance, device,
                       VK_ERROR_INITIALIZATION_FAILED,
229 230
                       "Failed to find build-id");
   }
231

232
   unsigned build_id_len = build_id_length(note);
233
   if (build_id_len < 20) {
234 235
      return vk_errorf(device->instance, device,
                       VK_ERROR_INITIALIZATION_FAILED,
236 237
                       "build-id too short.  It needs to be a SHA");
   }
238

239 240 241 242
   struct mesa_sha1 sha1_ctx;
   uint8_t sha1[20];
   STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));

243 244 245
   /* The pipeline cache UUID is used for determining when a pipeline cache is
    * invalid.  It needs both a driver build and the PCI ID of the device.
    */
246 247
   _mesa_sha1_init(&sha1_ctx);
   _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
248 249
   _mesa_sha1_update(&sha1_ctx, &device->chipset_id,
                     sizeof(device->chipset_id));
250
   _mesa_sha1_final(&sha1_ctx, sha1);
251
   memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
252

253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
   /* The driver UUID is used for determining sharability of images and memory
    * between two Vulkan instances in separate processes.  People who want to
    * share memory need to also check the device UUID (below) so all this
    * needs to be is the build-id.
    */
   memcpy(device->driver_uuid, build_id_data(note), VK_UUID_SIZE);

   /* The device UUID uniquely identifies the given device within the machine.
    * Since we never have more than one device, this doesn't need to be a real
    * UUID.  However, on the off-chance that someone tries to use this to
    * cache pre-tiled images or something of the like, we use the PCI ID and
    * some bits of ISL info to ensure that this is safe.
    */
   _mesa_sha1_init(&sha1_ctx);
   _mesa_sha1_update(&sha1_ctx, &device->chipset_id,
                     sizeof(device->chipset_id));
   _mesa_sha1_update(&sha1_ctx, &device->isl_dev.has_bit6_swizzling,
                     sizeof(device->isl_dev.has_bit6_swizzling));
   _mesa_sha1_final(&sha1_ctx, sha1);
   memcpy(device->device_uuid, sha1, VK_UUID_SIZE);

274
   return VK_SUCCESS;
275 276
}

Kristian Høgsberg's avatar
Kristian Høgsberg committed
277
static VkResult
278 279 280
anv_physical_device_init(struct anv_physical_device *device,
                         struct anv_instance *instance,
                         const char *path)
Kristian Høgsberg's avatar
Kristian Høgsberg committed
281
{
282
   VkResult result;
283 284
   int fd;

285 286
   brw_process_intel_debug_variable();

287 288
   fd = open(path, O_RDWR | O_CLOEXEC);
   if (fd < 0)
289
      return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
290

291
   device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
292
   device->instance = instance;
293 294 295

   assert(strlen(path) < ARRAY_SIZE(device->path));
   strncpy(device->path, path, ARRAY_SIZE(device->path));
Chad Versace's avatar
Chad Versace committed
296

297
   device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
298
   if (!device->chipset_id) {
299
      result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
300
      goto fail;
301
   }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
302

303
   device->name = gen_get_device_name(device->chipset_id);
304
   if (!gen_get_device_info(device->chipset_id, &device->info)) {
305
      result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
306
      goto fail;
307
   }
308

309
   if (device->info.is_haswell) {
310
      intel_logw("Haswell Vulkan support is incomplete");
311
   } else if (device->info.gen == 7 && !device->info.is_baytrail) {
312
      intel_logw("Ivy Bridge Vulkan support is incomplete");
313
   } else if (device->info.gen == 7 && device->info.is_baytrail) {
314
      intel_logw("Bay Trail Vulkan support is incomplete");
315
   } else if (device->info.gen >= 8 && device->info.gen <= 9) {
316 317 318 319
      /* Broadwell, Cherryview, Skylake, Broxton, Kabylake, Coffelake is as
       * fully supported as anything */
   } else if (device->info.gen == 10) {
      intel_logw("Cannonlake Vulkan support is alpha");
320
   } else {
321 322
      result = vk_errorf(device->instance, device,
                         VK_ERROR_INCOMPATIBLE_DRIVER,
323 324 325 326
                         "Vulkan not yet supported on %s", device->name);
      goto fail;
   }

327
   device->cmd_parser_version = -1;
328
   if (device->info.gen == 7) {
329 330 331
      device->cmd_parser_version =
         anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
      if (device->cmd_parser_version == -1) {
332 333
         result = vk_errorf(device->instance, device,
                            VK_ERROR_INITIALIZATION_FAILED,
334 335 336 337 338
                            "failed to get command parser version");
         goto fail;
      }
   }

339
   if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
340 341
      result = vk_errorf(device->instance, device,
                         VK_ERROR_INITIALIZATION_FAILED,
Chad Versace's avatar
Chad Versace committed
342
                         "kernel missing gem wait");
Kristian Høgsberg's avatar
Kristian Høgsberg committed
343
      goto fail;
344
   }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
345

346
   if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
347 348
      result = vk_errorf(device->instance, device,
                         VK_ERROR_INITIALIZATION_FAILED,
Chad Versace's avatar
Chad Versace committed
349
                         "kernel missing execbuf2");
Kristian Høgsberg's avatar
Kristian Høgsberg committed
350
      goto fail;
351
   }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
352

353
   if (!device->info.has_llc &&
354
       anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
355 356
      result = vk_errorf(device->instance, device,
                         VK_ERROR_INITIALIZATION_FAILED,
357 358 359 360
                         "kernel missing wc mmap");
      goto fail;
   }

361
   result = anv_physical_device_init_heaps(device, fd);
362 363 364
   if (result != VK_SUCCESS)
      goto fail;

365
   device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
366
   device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE);
367
   device->has_exec_fence = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE);
368
   device->has_syncobj = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY);
369 370
   device->has_syncobj_wait = device->has_syncobj &&
                              anv_gem_supports_syncobj_wait(fd);
371

372 373
   bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);

374 375 376 377 378 379 380 381 382 383 384 385 386
   /* Starting with Gen10, the timestamp frequency of the command streamer may
    * vary from one part to another. We can query the value from the kernel.
    */
   if (device->info.gen >= 10) {
      int timestamp_frequency =
         anv_gem_get_param(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY);

      if (timestamp_frequency < 0)
         intel_logw("Kernel 4.16-rc1+ required to properly query CS timestamp frequency");
      else
         device->info.timestamp_frequency = timestamp_frequency;
   }

387
   /* GENs prior to 8 do not support EU/Subslice info */
388
   if (device->info.gen >= 8) {
389 390 391 392 393 394 395 396
      device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
      device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);

      /* Without this information, we cannot get the right Braswell
       * brandstrings, and we have to use conservative numbers for GPGPU on
       * many platforms, but otherwise, things will just work.
       */
      if (device->subslice_total < 1 || device->eu_total < 1) {
397
         intel_logw("Kernel 4.1 required to properly query GPU properties");
398
      }
399 400
   } else if (device->info.gen == 7) {
      device->subslice_total = 1 << (device->info.gt - 1);
401 402
   }

403
   if (device->info.is_cherryview &&
404
       device->subslice_total > 0 && device->eu_total > 0) {
405 406 407
      /* Logical CS threads = EUs per subslice * num threads per EU */
      uint32_t max_cs_threads =
         device->eu_total / device->subslice_total * device->info.num_thread_per_eu;
408 409

      /* Fuse configurations may give more threads than expected, never less. */
410 411
      if (max_cs_threads > device->info.max_cs_threads)
         device->info.max_cs_threads = max_cs_threads;
412 413
   }

414
   device->compiler = brw_compiler_create(NULL, &device->info);
415 416 417 418
   if (device->compiler == NULL) {
      result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
      goto fail;
   }
419 420
   device->compiler->shader_debug_log = compiler_debug_log;
   device->compiler->shader_perf_log = compiler_perf_log;
421
   device->compiler->supports_pull_constants = false;
Jason Ekstrand's avatar
Jason Ekstrand committed
422
   device->compiler->constant_buffer_0_is_relative = true;
423

424 425 426 427 428 429
   isl_device_init(&device->isl_dev, &device->info, swizzled);

   result = anv_physical_device_init_uuids(device);
   if (result != VK_SUCCESS)
      goto fail;

430
   result = anv_init_wsi(device);
431 432 433 434
   if (result != VK_SUCCESS) {
      ralloc_free(device->compiler);
      goto fail;
   }
435

436
   device->local_fd = fd;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
437
   return VK_SUCCESS;
438

439
fail:
440
   close(fd);
441
   return result;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
442 443
}

444 445 446
static void
anv_physical_device_finish(struct anv_physical_device *device)
{
447
   anv_finish_wsi(device);
448
   ralloc_free(device->compiler);
449
   close(device->local_fd);
450 451
}

452
static void *
453
default_alloc_func(void *pUserData, size_t size, size_t align,
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
                   VkSystemAllocationScope allocationScope)
{
   return malloc(size);
}

static void *
default_realloc_func(void *pUserData, void *pOriginal, size_t size,
                     size_t align, VkSystemAllocationScope allocationScope)
{
   return realloc(pOriginal, size);
}

static void
default_free_func(void *pUserData, void *pMemory)
{
   free(pMemory);
}

static const VkAllocationCallbacks default_alloc = {
   .pUserData = NULL,
   .pfnAllocation = default_alloc_func,
   .pfnReallocation = default_realloc_func,
   .pfnFree = default_free_func,
};

479
VkResult anv_CreateInstance(
Kristian Høgsberg's avatar
Kristian Høgsberg committed
480
    const VkInstanceCreateInfo*                 pCreateInfo,
481
    const VkAllocationCallbacks*                pAllocator,
Kristian Høgsberg's avatar
Kristian Høgsberg committed
482 483 484 485 486 487
    VkInstance*                                 pInstance)
{
   struct anv_instance *instance;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);

488 489 490 491 492 493 494
   /* Check if user passed a debug report callback to be used during
    * Create/Destroy of instance.
    */
   const VkDebugReportCallbackCreateInfoEXT *ctor_cb =
      vk_find_struct_const(pCreateInfo->pNext,
                           DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT);

495 496 497 498 499 500 501 502
   uint32_t client_version;
   if (pCreateInfo->pApplicationInfo &&
       pCreateInfo->pApplicationInfo->apiVersion != 0) {
      client_version = pCreateInfo->pApplicationInfo->apiVersion;
   } else {
      client_version = VK_MAKE_VERSION(1, 0, 0);
   }

Jason Ekstrand's avatar
Jason Ekstrand committed
503
   if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
504
       client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
505 506 507 508 509 510 511 512 513 514 515

      if (ctor_cb && ctor_cb->flags & VK_DEBUG_REPORT_ERROR_BIT_EXT)
         ctor_cb->pfnCallback(VK_DEBUG_REPORT_ERROR_BIT_EXT,
                              VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT,
                              VK_NULL_HANDLE, /* No handle available yet. */
                              __LINE__,
                              0,
                              "anv",
                              "incompatible driver version",
                              ctor_cb->pUserData);

516
      return vk_errorf(NULL, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
517 518 519 520
                       "Client requested version %d.%d.%d",
                       VK_VERSION_MAJOR(client_version),
                       VK_VERSION_MINOR(client_version),
                       VK_VERSION_PATCH(client_version));
521
   }
522

523
   for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
524 525
      const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
      if (!anv_instance_extension_supported(ext_name))
Chad Versace's avatar
Chad Versace committed
526
         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
527 528
   }

529
   instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
530
                         VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
531 532 533
   if (!instance)
      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

534
   instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
535 536 537 538 539 540

   if (pAllocator)
      instance->alloc = *pAllocator;
   else
      instance->alloc = default_alloc;

541
   instance->apiVersion = client_version;
542
   instance->physicalDeviceCount = -1;
Kristian Høgsberg's avatar
Kristian Høgsberg committed
543

544 545 546 547 548 549 550 551 552 553 554 555 556 557
   if (pthread_mutex_init(&instance->callbacks_mutex, NULL) != 0) {
      vk_free2(&default_alloc, pAllocator, instance);
      return vk_error(VK_ERROR_INITIALIZATION_FAILED);
   }

   list_inithead(&instance->callbacks);

   /* Store report debug callback to be used during DestroyInstance. */
   if (ctor_cb) {
      instance->destroy_debug_cb.flags = ctor_cb->flags;
      instance->destroy_debug_cb.callback = ctor_cb->pfnCallback;
      instance->destroy_debug_cb.data = ctor_cb->pUserData;
   }

558 559
   _mesa_locale_init();

560 561
   VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));

562
   *pInstance = anv_instance_to_handle(instance);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
563 564 565 566

   return VK_SUCCESS;
}

567
void anv_DestroyInstance(
568 569
    VkInstance                                  _instance,
    const VkAllocationCallbacks*                pAllocator)
Kristian Høgsberg's avatar
Kristian Høgsberg committed
570
{
571
   ANV_FROM_HANDLE(anv_instance, instance, _instance);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
572

573 574 575
   if (!instance)
      return;

576 577 578 579 580 581
   if (instance->physicalDeviceCount > 0) {
      /* We support at most one physical device. */
      assert(instance->physicalDeviceCount == 1);
      anv_physical_device_finish(&instance->physicalDevice);
   }

582 583
   VG(VALGRIND_DESTROY_MEMPOOL(instance));

584 585
   pthread_mutex_destroy(&instance->callbacks_mutex);

586 587
   _mesa_locale_fini();

588
   vk_free(&instance->alloc, instance);
589 590
}

591 592 593 594 595 596 597 598 599 600
static VkResult
anv_enumerate_devices(struct anv_instance *instance)
{
   /* TODO: Check for more devices ? */
   drmDevicePtr devices[8];
   VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
   int max_devices;

   instance->physicalDeviceCount = 0;

601
   max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
   if (max_devices < 1)
      return VK_ERROR_INCOMPATIBLE_DRIVER;

   for (unsigned i = 0; i < (unsigned)max_devices; i++) {
      if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
          devices[i]->bustype == DRM_BUS_PCI &&
          devices[i]->deviceinfo.pci->vendor_id == 0x8086) {

         result = anv_physical_device_init(&instance->physicalDevice,
                        instance,
                        devices[i]->nodes[DRM_NODE_RENDER]);
         if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
            break;
      }
   }
617
   drmFreeDevices(devices, max_devices);
618 619 620 621 622 623 624 625

   if (result == VK_SUCCESS)
      instance->physicalDeviceCount = 1;

   return result;
}


626
VkResult anv_EnumeratePhysicalDevices(
Kristian Høgsberg's avatar
Kristian Høgsberg committed
627 628 629 630
    VkInstance                                  _instance,
    uint32_t*                                   pPhysicalDeviceCount,
    VkPhysicalDevice*                           pPhysicalDevices)
{
631
   ANV_FROM_HANDLE(anv_instance, instance, _instance);
632
   VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
633 634
   VkResult result;

635
   if (instance->physicalDeviceCount < 0) {
636 637 638
      result = anv_enumerate_devices(instance);
      if (result != VK_SUCCESS &&
          result != VK_ERROR_INCOMPATIBLE_DRIVER)
639 640
         return result;
   }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
641

642 643 644 645 646
   if (instance->physicalDeviceCount > 0) {
      assert(instance->physicalDeviceCount == 1);
      vk_outarray_append(&out, i) {
         *i = anv_physical_device_to_handle(&instance->physicalDevice);
      }
647
   }
Kristian Høgsberg's avatar
Kristian Høgsberg committed
648

649
   return vk_outarray_status(&out);
Kristian Høgsberg's avatar
Kristian Høgsberg committed
650 651
}

652
void anv_GetPhysicalDeviceFeatures(
653 654 655
    VkPhysicalDevice                            physicalDevice,
    VkPhysicalDeviceFeatures*                   pFeatures)
{
656
   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
657 658

   *pFeatures = (VkPhysicalDeviceFeatures) {
659
      .robustBufferAccess                       = true,
660
      .fullDrawIndexUint32                      = true,
661
      .imageCubeArray                           = true,
662
      .independentBlend                         = true,
663
      .geometryShader                           = true,
664
      .tessellationShader                       = true,
665
      .sampleRateShading                        = true,
666
      .dualSrcBlend                             = true,
667
      .logicOp                                  = true,
668
      .multiDrawIndirect                        = true,
669
      .drawIndirectFirstInstance                = true,
670
      .depthClamp                               = true,
671
      .depthBiasClamp                           = true,
672 673 674 675
      .fillModeNonSolid                         = true,
      .depthBounds                              = false,
      .wideLines                                = true,
      .largePoints                              = true,
676 677
      .alphaToOne                               = true,
      .multiViewport                            = true,
678
      .samplerAnisotropy                        = true,
679 680 681
      .textureCompressionETC2                   = pdevice->info.gen >= 8 ||
                                                  pdevice->info.is_baytrail,
      .textureCompressionASTC_LDR               = pdevice->info.gen >= 9, /* FINISHME CHV */
682
      .textureCompressionBC                     = true,
683
      .occlusionQueryPrecise                    = true,
684
      .pipelineStatisticsQuery                  = true,
685 686
      .fragmentStoresAndAtomics                 = true,
      .shaderTessellationAndGeometryPointSize   = true,
687
      .shaderImageGatherExtended                = true,
688
      .shaderStorageImageExtendedFormats        = true,
689
      .shaderStorageImageMultisample            = false,
690
      .shaderStorageImageReadWithoutFormat      = false,
691
      .shaderStorageImageWriteWithoutFormat     = true,
692
      .shaderUniformBufferArrayDynamicIndexing  = true,
693 694 695
      .shaderSampledImageArrayDynamicIndexing   = true,
      .shaderStorageBufferArrayDynamicIndexing  = true,
      .shaderStorageImageArrayDynamicIndexing   = true,
696 697
      .shaderClipDistance                       = true,
      .shaderCullDistance                       = true,
698
      .shaderFloat64                            = pdevice->info.gen >= 8,
699
      .shaderInt64                              = pdevice->info.gen >= 8,
700
      .shaderInt16                              = false,
701
      .shaderResourceMinLod                     = false,
702
      .variableMultisampleRate                  = false,
703
      .inheritedQueries                         = true,
704
   };
705 706 707

   /* We can't do image stores in vec4 shaders */
   pFeatures->vertexPipelineStoresAndAtomics =
708 709
      pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
      pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
710 711
}

712 713 714 715 716 717
void anv_GetPhysicalDeviceFeatures2KHR(
    VkPhysicalDevice                            physicalDevice,
    VkPhysicalDeviceFeatures2KHR*               pFeatures)
{
   anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);

718 719
   vk_foreach_struct(ext, pFeatures->pNext) {
      switch (ext->sType) {
720 721 722 723 724 725 726 727 728
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
         VkPhysicalDeviceMultiviewFeaturesKHX *features =
            (VkPhysicalDeviceMultiviewFeaturesKHX *)ext;
         features->multiview = true;
         features->multiviewGeometryShader = true;
         features->multiviewTessellationShader = true;
         break;
      }

729 730 731
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
         VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
         features->variablePointersStorageBuffer = true;
732
         features->variablePointers = true;
733 734 735
         break;
      }

736 737 738 739 740 741 742
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES_KHR: {
         VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR *features =
            (VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR *) ext;
         features->samplerYcbcrConversion = true;
         break;
      }

743 744 745 746
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR: {
         VkPhysicalDevice16BitStorageFeaturesKHR *features =
            (VkPhysicalDevice16BitStorageFeaturesKHR *)ext;

747 748
         features->storageBuffer16BitAccess = false;
         features->uniformAndStorageBuffer16BitAccess = false;
749 750 751 752 753
         features->storagePushConstant16 = false;
         features->storageInputOutput16 = false;
         break;
      }

754
      default:
755
         anv_debug_ignored_stype(ext->sType);
756 757 758 759 760
         break;
      }
   }
}

761
void anv_GetPhysicalDeviceProperties(
762
    VkPhysicalDevice                            physicalDevice,
763
    VkPhysicalDeviceProperties*                 pProperties)
764
{
765
   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
766
   const struct gen_device_info *devinfo = &pdevice->info;
767

768 769 770 771
   /* See assertions made when programming the buffer surface state. */
   const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
                                      (1ul << 30) : (1ul << 27);

772 773 774
   const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
                                 128 : 16;

775
   VkSampleCountFlags sample_counts =
776
      isl_device_get_sample_counts(&pdevice->isl_dev);
777

778
   VkPhysicalDeviceLimits limits = {
779 780
      .maxImageDimension1D                      = (1 << 14),
      .maxImageDimension2D                      = (1 << 14),
781
      .maxImageDimension3D                      = (1 << 11),
782
      .maxImageDimensionCube                    = (1 << 14),
783
      .maxImageArrayLayers                      = (1 << 11),
784
      .maxTexelBufferElements                   = 128 * 1024 * 1024,
785 786
      .maxUniformBufferRange                    = (1ul << 27),
      .maxStorageBufferRange                    = max_raw_buffer_sz,
787
      .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
788
      .maxMemoryAllocationCount                 = UINT32_MAX,
789
      .maxSamplerAllocationCount                = 64 * 1024,
790
      .bufferImageGranularity                   = 64, /* A cache line */
791
      .sparseAddressSpaceSize                   = 0,
792
      .maxBoundDescriptorSets                   = MAX_SETS,
793
      .maxPerStageDescriptorSamplers            = max_samplers,
794 795
      .maxPerStageDescriptorUniformBuffers      = 64,
      .maxPerStageDescriptorStorageBuffers      = 64,
796
      .maxPerStageDescriptorSampledImages       = max_samplers,
797
      .maxPerStageDescriptorStorageImages       = 64,
798
      .maxPerStageDescriptorInputAttachments    = 64,
799
      .maxPerStageResources                     = 250,
800 801
      .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
      .maxDescriptorSetUniformBuffers           = 6 * 64,           /* number of stages * maxPerStageDescriptorUniformBuffers */
802
      .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
803
      .maxDescriptorSetStorageBuffers           = 6 * 64,           /* number of stages * maxPerStageDescriptorStorageBuffers */
804
      .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
805 806
      .maxDescriptorSetSampledImages            = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */
      .maxDescriptorSetStorageImages            = 6 * 64,           /* number of stages * maxPerStageDescriptorStorageImages */
807
      .maxDescriptorSetInputAttachments         = 256,
808 809
      .maxVertexInputAttributes                 = MAX_VBS,
      .maxVertexInputBindings                   = MAX_VBS,
810 811 812
      .maxVertexInputAttributeOffset            = 2047,
      .maxVertexInputBindingStride              = 2048,
      .maxVertexOutputComponents                = 128,
813 814 815 816 817 818 819 820
      .maxTessellationGenerationLevel           = 64,
      .maxTessellationPatchSize                 = 32,
      .maxTessellationControlPerVertexInputComponents = 128,
      .maxTessellationControlPerVertexOutputComponents = 128,
      .maxTessellationControlPerPatchOutputComponents = 128,
      .maxTessellationControlTotalOutputComponents = 2048,
      .maxTessellationEvaluationInputComponents = 128,
      .maxTessellationEvaluationOutputComponents = 128,
821 822 823 824 825 826
      .maxGeometryShaderInvocations             = 32,
      .maxGeometryInputComponents               = 64,
      .maxGeometryOutputComponents              = 128,
      .maxGeometryOutputVertices                = 256,
      .maxGeometryTotalOutputComponents         = 1024,
      .maxFragmentInputComponents               = 128,
827
      .maxFragmentOutputAttachments             = 8,
828
      .maxFragmentDualSrcAttachments            = 1,
829
      .maxFragmentCombinedOutputResources       = 8,
830 831
      .maxComputeSharedMemorySize               = 32768,
      .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
832
      .maxComputeWorkGroupInvocations           = 16 * devinfo->max_cs_threads,
833
      .maxComputeWorkGroupSize = {
834 835 836
         16 * devinfo->max_cs_threads,
         16 * devinfo->max_cs_threads,
         16 * devinfo->max_cs_threads,
837 838 839 840 841
      },
      .subPixelPrecisionBits                    = 4 /* FIXME */,
      .subTexelPrecisionBits                    = 4 /* FIXME */,
      .mipmapPrecisionBits                      = 4 /* FIXME */,
      .maxDrawIndexedIndexValue                 = UINT32_MAX,
842
      .maxDrawIndirectCount                     = UINT32_MAX,
843 844
      .maxSamplerLodBias                        = 16,
      .maxSamplerAnisotropy                     = 16,
845
      .maxViewports                             = MAX_VIEWPORTS,
846
      .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
847
      .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
848
      .viewportSubPixelBits                     = 13, /* We take a float? */
849
      .minMemoryMapAlignment                    = 4096, /* A page */
850
      .minTexelBufferOffsetAlignment            = 1,
851 852
      /* We need 16 for UBO block reads to work and 32 for push UBOs */
      .minUniformBufferOffsetAlignment          = 32,
853
      .minStorageBufferOffsetAlignment          = 4,
854 855
      .minTexelOffset                           = -8,
      .maxTexelOffset                           = 7,
856 857
      .minTexelGatherOffset                     = -32,
      .maxTexelGatherOffset                     = 31,
858 859 860
      .minInterpolationOffset                   = -0.5,
      .maxInterpolationOffset                   = 0.4375,
      .subPixelInterpolationOffsetBits          = 4,
861 862
      .maxFramebufferWidth                      = (1 << 14),
      .maxFramebufferHeight                     = (1 << 14),
863
      .maxFramebufferLayers                     = (1 << 11),
864 865 866 867
      .framebufferColorSampleCounts             = sample_counts,
      .framebufferDepthSampleCounts             = sample_counts,
      .framebufferStencilSampleCounts           = sample_counts,
      .framebufferNoAttachmentsSampleCounts     = sample_counts,
868
      .maxColorAttachments                      = MAX_RTS,
869 870 871 872 873
      .sampledImageColorSampleCounts            = sample_counts,
      .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
      .sampledImageDepthSampleCounts            = sample_counts,
      .sampledImageStencilSampleCounts          = sample_counts,
      .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
874
      .maxSampleMaskWords                       = 1,
875
      .timestampComputeAndGraphics              = false,
876
      .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
877 878 879
      .maxClipDistances                         = 8,
      .maxCullDistances                         = 8,
      .maxCombinedClipAndCullDistances          = 8,
880
      .discreteQueuePriorities                  = 1,
881 882 883 884
      .pointSizeRange                           = { 0.125, 255.875 },
      .lineWidthRange                           = { 0.0, 7.9921875 },
      .pointSizeGranularity                     = (1.0 / 8.0),
      .lineWidthGranularity                     = (1.0 / 128.0),
885
      .strictLines                              = false, /* FINISHME */
886
      .standardSampleLocations                  = true,
887 888 889
      .optimalBufferCopyOffsetAlignment         = 128,
      .optimalBufferCopyRowPitchAlignment       = 128,
      .nonCoherentAtomSize                      = 64,
890 891
   };

892
   *pProperties = (VkPhysicalDeviceProperties) {
893
      .apiVersion = anv_physical_device_api_version(pdevice),
894
      .driverVersion = vk_get_driver_version(),
895 896
      .vendorID = 0x8086,
      .deviceID = pdevice->chipset_id,
897
      .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
898 899
      .limits = limits,
      .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
900 901
   };

902 903
   snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
            "%s", pdevice->name);
904 905
   memcpy(pProperties->pipelineCacheUUID,
          pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
906 907
}

908 909 910 911
void anv_GetPhysicalDeviceProperties2KHR(
    VkPhysicalDevice                            physicalDevice,
    VkPhysicalDeviceProperties2KHR*             pProperties)
{
912 913
   ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);

914 915
   anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);

916 917
   vk_foreach_struct(ext, pProperties->pNext) {
      switch (ext->sType) {
918 919 920 921 922 923 924 925
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
            (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;

         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
         break;
      }

926 927 928 929 930 931 932 933 934 935
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
         VkPhysicalDeviceIDPropertiesKHR *id_props =
            (VkPhysicalDeviceIDPropertiesKHR *)ext;
         memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
         memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
         /* The LUID is for Windows. */
         id_props->deviceLUIDValid = false;
         break;
      }

936 937 938 939 940 941 942 943
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
         VkPhysicalDeviceMultiviewPropertiesKHX *properties =
            (VkPhysicalDeviceMultiviewPropertiesKHX *)ext;
         properties->maxMultiviewViewCount = 16;
         properties->maxMultiviewInstanceIndex = UINT32_MAX / 16;
         break;
      }

944 945 946 947 948 949 950 951
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
         VkPhysicalDevicePointClippingPropertiesKHR *properties =
            (VkPhysicalDevicePointClippingPropertiesKHR *) ext;
         properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
         anv_finishme("Implement pop-free point clipping");
         break;
      }

952
      default:
953
         anv_debug_ignored_stype(ext->sType);
954 955 956 957 958
         break;
      }
   }
}

959 960 961 962 963 964 965 966
/* We support exactly one queue family. */
static const VkQueueFamilyProperties
anv_queue_family_properties = {
   .queueFlags = VK_QUEUE_GRAPHICS_BIT |
                 VK_QUEUE_COMPUTE_BIT |
                 VK_QUEUE_TRANSFER_BIT,
   .queueCount = 1,
   .timestampValidBits = 36, /* XXX: Real value here */
967
   .minImageTransferGranularity = { 1, 1, 1 },
968
};
969

970
void anv_GetPhysicalDeviceQueueFamilyProperties(
971
    VkPhysicalDevice                            physicalDevice,
972 973
    uint32_t*                                   pCount,
    VkQueueFamilyProperties*                    pQueueFamilyProperties)
974
{
975
   VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount);
976

977 978
   vk_outarray_append(&out, p) {
      *p = anv_queue_family_properties;
979
   }
980 981
}

982 983 984 985 986 987
void anv_GetPhysicalDeviceQueueFamilyProperties2KHR(
    VkPhysicalDevice                            physicalDevice,
    uint32_t*                                   pQueueFamilyPropertyCount,
    VkQueueFamilyProperties2KHR*                pQueueFamilyProperties)
{

988
   VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);
989

990 991
   vk_outarray_append(&out, p) {
      p->queueFamilyProperties = anv_queue_family_properties;
992

993 994
      vk_foreach_struct(s, p->pNext) {
         anv_debug_ignored_stype(s->sType);
995 996 997 998
      }
   }
}