annotate gcc/config/gcn/gcn-run.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
145
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1 /* Run a stand-alone AMD GCN kernel.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
2
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
3 Copyright 2017 Mentor Graphics Corporation
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
4 Copyright (C) 2018-2020 Free Software Foundation, Inc.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
5
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
6 This program is free software: you can redistribute it and/or modify
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
7 it under the terms of the GNU General Public License as published by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
8 the Free Software Foundation, either version 3 of the License, or
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
9 (at your option) any later version.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
10
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
11 This program is distributed in the hope that it will be useful,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
14 GNU General Public License for more details.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
15
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
18
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
19 /* This program will run a compiled stand-alone GCN kernel on a GPU.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
20
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
21 The kernel entry point's signature must use a standard main signature:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
22
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
23 int main(int argc, char **argv)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
24 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
25
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
26 #include <stdint.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
27 #include <stdbool.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
28 #include <stdlib.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
29 #include <malloc.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
30 #include <stdio.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
31 #include <string.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
32 #include <dlfcn.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
33 #include <unistd.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
34 #include <elf.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
35 #include <signal.h>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
36
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
37 /* These probably won't be in elf.h for a while. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
38 #ifndef R_AMDGPU_NONE
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
39 #define R_AMDGPU_NONE 0
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
40 #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
41 #define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
42 #define R_AMDGPU_ABS64 3 /* S + A */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
43 #define R_AMDGPU_REL32 4 /* S + A - P */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
44 #define R_AMDGPU_REL64 5 /* S + A - P */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
45 #define R_AMDGPU_ABS32 6 /* S + A */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
46 #define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
47 #define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
48 #define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
49 #define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
50 #define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
51 #define reserved 12
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
52 #define R_AMDGPU_RELATIVE64 13 /* B + A */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
53 #endif
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
54
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
55 #include "hsa.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
56
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
57 #ifndef HSA_RUNTIME_LIB
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
58 #define HSA_RUNTIME_LIB "libhsa-runtime64.so"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
59 #endif
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
60
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
61 #ifndef VERSION_STRING
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
62 #define VERSION_STRING "(version unknown)"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
63 #endif
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
64
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
65 bool debug = false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
66
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
67 hsa_agent_t device = { 0 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
68 hsa_queue_t *queue = NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
69 uint64_t init_array_kernel = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
70 uint64_t fini_array_kernel = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
71 uint64_t main_kernel = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
72 hsa_executable_t executable = { 0 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
73
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
74 hsa_region_t kernargs_region = { 0 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
75 hsa_region_t heap_region = { 0 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
76 uint32_t kernarg_segment_size = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
77 uint32_t group_segment_size = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
78 uint32_t private_segment_size = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
79
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
80 static void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
81 usage (const char *progname)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
82 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
83 printf ("Usage: %s [options] kernel [kernel-args]\n\n"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
84 "Options:\n"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
85 " --help\n"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
86 " --version\n"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
87 " --debug\n", progname);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
88 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
89
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
90 static void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
91 version (const char *progname)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
92 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
93 printf ("%s " VERSION_STRING "\n", progname);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
94 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
95
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
96 /* As an HSA runtime is dlopened, following structure defines the necessary
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
97 function pointers.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
98 Code adapted from libgomp. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
99
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
100 struct hsa_runtime_fn_info
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
101 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
102 /* HSA runtime. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
103 hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
104 const char **status_string);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
105 hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
106 hsa_agent_info_t attribute,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
107 void *value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
108 hsa_status_t (*hsa_init_fn) (void);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
109 hsa_status_t (*hsa_iterate_agents_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
110 (hsa_status_t (*callback) (hsa_agent_t agent, void *data), void *data);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
111 hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
112 hsa_region_info_t attribute,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
113 void *value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
114 hsa_status_t (*hsa_queue_create_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
115 (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
116 void (*callback) (hsa_status_t status, hsa_queue_t *source, void *data),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
117 void *data, uint32_t private_segment_size,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
118 uint32_t group_segment_size, hsa_queue_t **queue);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
119 hsa_status_t (*hsa_agent_iterate_regions_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
120 (hsa_agent_t agent,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
121 hsa_status_t (*callback) (hsa_region_t region, void *data), void *data);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
122 hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
123 hsa_status_t (*hsa_executable_create_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
124 (hsa_profile_t profile, hsa_executable_state_t executable_state,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
125 const char *options, hsa_executable_t *executable);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
126 hsa_status_t (*hsa_executable_global_variable_define_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
127 (hsa_executable_t executable, const char *variable_name, void *address);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
128 hsa_status_t (*hsa_executable_load_code_object_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
129 (hsa_executable_t executable, hsa_agent_t agent,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
130 hsa_code_object_t code_object, const char *options);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
131 hsa_status_t (*hsa_executable_freeze_fn) (hsa_executable_t executable,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
132 const char *options);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
133 hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
134 uint32_t num_consumers,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
135 const hsa_agent_t *consumers,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
136 hsa_signal_t *signal);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
137 hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
138 void **ptr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
139 hsa_status_t (*hsa_memory_assign_agent_fn) (void *ptr, hsa_agent_t agent,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
140 hsa_access_permission_t access);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
141 hsa_status_t (*hsa_memory_copy_fn) (void *dst, const void *src,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
142 size_t size);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
143 hsa_status_t (*hsa_memory_free_fn) (void *ptr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
144 hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
145 hsa_status_t (*hsa_executable_get_symbol_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
146 (hsa_executable_t executable, const char *module_name,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
147 const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
148 hsa_executable_symbol_t *symbol);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
149 hsa_status_t (*hsa_executable_symbol_get_info_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
150 (hsa_executable_symbol_t executable_symbol,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
151 hsa_executable_symbol_info_t attribute, void *value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
152 void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
153 hsa_signal_value_t value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
154 hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
155 (hsa_signal_t signal, hsa_signal_condition_t condition,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
156 hsa_signal_value_t compare_value, uint64_t timeout_hint,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
157 hsa_wait_state_t wait_state_hint);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
158 hsa_signal_value_t (*hsa_signal_wait_relaxed_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
159 (hsa_signal_t signal, hsa_signal_condition_t condition,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
160 hsa_signal_value_t compare_value, uint64_t timeout_hint,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
161 hsa_wait_state_t wait_state_hint);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
162 hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
163 hsa_status_t (*hsa_code_object_deserialize_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
164 (void *serialized_code_object, size_t serialized_code_object_size,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
165 const char *options, hsa_code_object_t *code_object);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
166 uint64_t (*hsa_queue_load_write_index_relaxed_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
167 (const hsa_queue_t *queue);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
168 void (*hsa_queue_store_write_index_relaxed_fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
169 (const hsa_queue_t *queue, uint64_t value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
170 hsa_status_t (*hsa_shut_down_fn) ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
171 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
172
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
173 /* HSA runtime functions that are initialized in init_hsa_context.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
174 Code adapted from libgomp. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
175
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
176 static struct hsa_runtime_fn_info hsa_fns;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
177
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
178 #define DLSYM_FN(function) \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
179 *(void**)(&hsa_fns.function##_fn) = dlsym (handle, #function); \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
180 if (hsa_fns.function##_fn == NULL) \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
181 goto fail;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
182
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
183 static void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
184 init_hsa_runtime_functions (void)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
185 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
186 void *handle = dlopen (HSA_RUNTIME_LIB, RTLD_LAZY);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
187 if (handle == NULL)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
188 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
189 fprintf (stderr,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
190 "The HSA runtime is required to run GCN kernels on hardware.\n"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
191 "%s: File not found or could not be opened\n",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
192 HSA_RUNTIME_LIB);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
193 exit (1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
194 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
195
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
196 DLSYM_FN (hsa_status_string)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
197 DLSYM_FN (hsa_agent_get_info)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
198 DLSYM_FN (hsa_init)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
199 DLSYM_FN (hsa_iterate_agents)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
200 DLSYM_FN (hsa_region_get_info)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
201 DLSYM_FN (hsa_queue_create)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
202 DLSYM_FN (hsa_agent_iterate_regions)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
203 DLSYM_FN (hsa_executable_destroy)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
204 DLSYM_FN (hsa_executable_create)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
205 DLSYM_FN (hsa_executable_global_variable_define)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
206 DLSYM_FN (hsa_executable_load_code_object)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
207 DLSYM_FN (hsa_executable_freeze)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
208 DLSYM_FN (hsa_signal_create)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
209 DLSYM_FN (hsa_memory_allocate)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
210 DLSYM_FN (hsa_memory_assign_agent)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
211 DLSYM_FN (hsa_memory_copy)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
212 DLSYM_FN (hsa_memory_free)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
213 DLSYM_FN (hsa_signal_destroy)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
214 DLSYM_FN (hsa_executable_get_symbol)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
215 DLSYM_FN (hsa_executable_symbol_get_info)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
216 DLSYM_FN (hsa_signal_wait_acquire)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
217 DLSYM_FN (hsa_signal_wait_relaxed)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
218 DLSYM_FN (hsa_signal_store_relaxed)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
219 DLSYM_FN (hsa_queue_destroy)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
220 DLSYM_FN (hsa_code_object_deserialize)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
221 DLSYM_FN (hsa_queue_load_write_index_relaxed)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
222 DLSYM_FN (hsa_queue_store_write_index_relaxed)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
223 DLSYM_FN (hsa_shut_down)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
224
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
225 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
226
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
227 fail:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
228 fprintf (stderr, "Failed to find HSA functions in " HSA_RUNTIME_LIB "\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
229 exit (1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
230 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
231
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
232 #undef DLSYM_FN
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
233
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
234 /* Report a fatal error STR together with the HSA error corresponding to
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
235 STATUS and terminate execution of the current process. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
236
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
237 static void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
238 hsa_fatal (const char *str, hsa_status_t status)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
239 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
240 const char *hsa_error_msg;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
241 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
242 fprintf (stderr, "%s: FAILED\nHSA Runtime message: %s\n", str,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
243 hsa_error_msg);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
244 exit (1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
245 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
246
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
247 /* Helper macros to ensure we check the return values from the HSA Runtime.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
248 These just keep the rest of the code a bit cleaner. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
249
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
250 #define XHSA_CMP(FN, CMP, MSG) \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
251 do { \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
252 hsa_status_t status = (FN); \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
253 if (!(CMP)) \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
254 hsa_fatal ((MSG), status); \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
255 else if (debug) \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
256 fprintf (stderr, "%s: OK\n", (MSG)); \
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
257 } while (0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
258 #define XHSA(FN, MSG) XHSA_CMP(FN, status == HSA_STATUS_SUCCESS, MSG)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
259
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
260 /* Callback of hsa_iterate_agents.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
261 Called once for each available device, and returns "break" when a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
262 suitable one has been found. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
263
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
264 static hsa_status_t
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
265 get_gpu_agent (hsa_agent_t agent, void *data __attribute__ ((unused)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
266 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
267 hsa_device_type_t device_type;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
268 XHSA (hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
269 &device_type),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
270 "Get agent type");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
271
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
272 /* Select only GPU devices. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
273 /* TODO: support selecting from multiple GPUs. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
274 if (HSA_DEVICE_TYPE_GPU == device_type)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
275 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
276 device = agent;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
277 return HSA_STATUS_INFO_BREAK;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
278 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
279
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
280 /* The device was not suitable. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
281 return HSA_STATUS_SUCCESS;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
282 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
283
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
284 /* Callback of hsa_iterate_regions.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
285 Called once for each available memory region, and returns "break" when a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
286 suitable one has been found. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
287
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
288 static hsa_status_t
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
289 get_memory_region (hsa_region_t region, hsa_region_t *retval,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
290 hsa_region_global_flag_t kind)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
291 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
292 /* Reject non-global regions. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
293 hsa_region_segment_t segment;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
294 hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT, &segment);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
295 if (HSA_REGION_SEGMENT_GLOBAL != segment)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
296 return HSA_STATUS_SUCCESS;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
297
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
298 /* Find a region with the KERNARG flag set. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
299 hsa_region_global_flag_t flags;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
300 hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
301 &flags);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
302 if (flags & kind)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
303 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
304 *retval = region;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
305 return HSA_STATUS_INFO_BREAK;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
306 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
307
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
308 /* The region was not suitable. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
309 return HSA_STATUS_SUCCESS;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
310 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
311
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
312 static hsa_status_t
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
313 get_kernarg_region (hsa_region_t region, void *data __attribute__((unused)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
314 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
315 return get_memory_region (region, &kernargs_region,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
316 HSA_REGION_GLOBAL_FLAG_KERNARG);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
317 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
318
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
319 static hsa_status_t
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
320 get_heap_region (hsa_region_t region, void *data __attribute__((unused)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
321 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
322 return get_memory_region (region, &heap_region,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
323 HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
324 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
325
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
326 /* Initialize the HSA Runtime library and GPU device. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
327
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
328 static void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
329 init_device ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
330 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
331 /* Load the shared library and find the API functions. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
332 init_hsa_runtime_functions ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
333
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
334 /* Initialize the HSA Runtime. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
335 XHSA (hsa_fns.hsa_init_fn (),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
336 "Initialize run-time");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
337
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
338 /* Select a suitable device.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
339 The call-back function, get_gpu_agent, does the selection. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
340 XHSA_CMP (hsa_fns.hsa_iterate_agents_fn (get_gpu_agent, NULL),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
341 status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
342 "Find a device");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
343
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
344 /* Initialize the queue used for launching kernels. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
345 uint32_t queue_size = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
346 XHSA (hsa_fns.hsa_agent_get_info_fn (device, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
347 &queue_size),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
348 "Find max queue size");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
349 XHSA (hsa_fns.hsa_queue_create_fn (device, queue_size,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
350 HSA_QUEUE_TYPE_SINGLE, NULL,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
351 NULL, UINT32_MAX, UINT32_MAX, &queue),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
352 "Set up a device queue");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
353
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
354 /* Select a memory region for the kernel arguments.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
355 The call-back function, get_kernarg_region, does the selection. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
356 XHSA_CMP (hsa_fns.hsa_agent_iterate_regions_fn (device, get_kernarg_region,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
357 NULL),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
358 status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
359 "Locate kernargs memory");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
360
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
361 /* Select a memory region for the kernel heap.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
362 The call-back function, get_heap_region, does the selection. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
363 XHSA_CMP (hsa_fns.hsa_agent_iterate_regions_fn (device, get_heap_region,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
364 NULL),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
365 status == HSA_STATUS_SUCCESS || status == HSA_STATUS_INFO_BREAK,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
366 "Locate device memory");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
367 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
368
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
369
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
370 /* Read a whole input file.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
371 Code copied from mkoffload. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
372
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
373 static char *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
374 read_file (const char *filename, size_t *plen)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
375 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
376 size_t alloc = 16384;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
377 size_t base = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
378 char *buffer;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
379
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
380 FILE *stream = fopen (filename, "rb");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
381 if (!stream)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
382 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
383 perror (filename);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
384 exit (1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
385 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
386
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
387 if (!fseek (stream, 0, SEEK_END))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
388 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
389 /* Get the file size. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
390 long s = ftell (stream);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
391 if (s >= 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
392 alloc = s + 100;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
393 fseek (stream, 0, SEEK_SET);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
394 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
395 buffer = malloc (alloc);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
396
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
397 for (;;)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
398 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
399 size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
400
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
401 if (!n)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
402 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
403 base += n;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
404 if (base + 1 == alloc)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
405 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
406 alloc *= 2;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
407 buffer = realloc (buffer, alloc);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
408 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
409 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
410 buffer[base] = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
411 *plen = base;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
412
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
413 fclose (stream);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
414
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
415 return buffer;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
416 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
417
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
418 /* Read a HSA Code Object (HSACO) from file, and load it into the device. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
419
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
420 static void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
421 load_image (const char *filename)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
422 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
423 size_t image_size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
424 Elf64_Ehdr *image = (void *) read_file (filename, &image_size);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
425
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
426 /* An "executable" consists of one or more code objects. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
427 XHSA (hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
428 HSA_EXECUTABLE_STATE_UNFROZEN, "",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
429 &executable),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
430 "Initialize GCN executable");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
431
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
432 /* Hide relocations from the HSA runtime loader.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
433 Keep a copy of the unmodified section headers to use later. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
434 Elf64_Shdr *image_sections =
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
435 (Elf64_Shdr *) ((char *) image + image->e_shoff);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
436 Elf64_Shdr *sections = malloc (sizeof (Elf64_Shdr) * image->e_shnum);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
437 memcpy (sections, image_sections, sizeof (Elf64_Shdr) * image->e_shnum);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
438 for (int i = image->e_shnum - 1; i >= 0; i--)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
439 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
440 if (image_sections[i].sh_type == SHT_RELA
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
441 || image_sections[i].sh_type == SHT_REL)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
442 /* Change section type to something harmless. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
443 image_sections[i].sh_type = SHT_NOTE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
444 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
445
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
446 /* Add the HSACO to the executable. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
447 hsa_code_object_t co = { 0 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
448 XHSA (hsa_fns.hsa_code_object_deserialize_fn (image, image_size, NULL, &co),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
449 "Deserialize GCN code object");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
450 XHSA (hsa_fns.hsa_executable_load_code_object_fn (executable, device, co,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
451 ""),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
452 "Load GCN code object");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
453
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
454 /* We're done modifying he executable. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
455 XHSA (hsa_fns.hsa_executable_freeze_fn (executable, ""),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
456 "Freeze GCN executable");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
457
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
458 /* Locate the "_init_array" function, and read the kernel's properties. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
459 hsa_executable_symbol_t symbol;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
460 XHSA (hsa_fns.hsa_executable_get_symbol_fn (executable, NULL, "_init_array",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
461 device, 0, &symbol),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
462 "Find '_init_array' function");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
463 XHSA (hsa_fns.hsa_executable_symbol_get_info_fn
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
464 (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &init_array_kernel),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
465 "Extract '_init_array' kernel object kernel object");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
466
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
467 /* Locate the "_fini_array" function, and read the kernel's properties. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
468 XHSA (hsa_fns.hsa_executable_get_symbol_fn (executable, NULL, "_fini_array",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
469 device, 0, &symbol),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
470 "Find '_fini_array' function");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
471 XHSA (hsa_fns.hsa_executable_symbol_get_info_fn
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
472 (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &fini_array_kernel),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
473 "Extract '_fini_array' kernel object kernel object");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
474
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
475 /* Locate the "main" function, and read the kernel's properties. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
476 XHSA (hsa_fns.hsa_executable_get_symbol_fn (executable, NULL, "main",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
477 device, 0, &symbol),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
478 "Find 'main' function");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
479 XHSA (hsa_fns.hsa_executable_symbol_get_info_fn
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
480 (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &main_kernel),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
481 "Extract 'main' kernel object");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
482 XHSA (hsa_fns.hsa_executable_symbol_get_info_fn
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
483 (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
484 &kernarg_segment_size),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
485 "Extract kernarg segment size");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
486 XHSA (hsa_fns.hsa_executable_symbol_get_info_fn
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
487 (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
488 &group_segment_size),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
489 "Extract group segment size");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
490 XHSA (hsa_fns.hsa_executable_symbol_get_info_fn
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
491 (symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
492 &private_segment_size),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
493 "Extract private segment size");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
494
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
495 /* Find main function in ELF, and calculate actual load offset. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
496 Elf64_Addr load_offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
497 XHSA (hsa_fns.hsa_executable_symbol_get_info_fn
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
498 (symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
499 &load_offset),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
500 "Extract 'main' symbol address");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
501 for (int i = 0; i < image->e_shnum; i++)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
502 if (sections[i].sh_type == SHT_SYMTAB)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
503 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
504 Elf64_Shdr *strtab = &sections[sections[i].sh_link];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
505 char *strings = (char *) image + strtab->sh_offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
506
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
507 for (size_t offset = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
508 offset < sections[i].sh_size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
509 offset += sections[i].sh_entsize)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
510 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
511 Elf64_Sym *sym = (Elf64_Sym *) ((char *) image
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
512 + sections[i].sh_offset + offset);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
513 if (strcmp ("main", strings + sym->st_name) == 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
514 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
515 load_offset -= sym->st_value;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
516 goto found_main;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
517 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
518 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
519 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
520 /* We only get here when main was not found.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
521 This should never happen. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
522 fprintf (stderr, "Error: main function not found.\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
523 abort ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
524 found_main:;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
525
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
526 /* Find dynamic symbol table. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
527 Elf64_Shdr *dynsym = NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
528 for (int i = 0; i < image->e_shnum; i++)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
529 if (sections[i].sh_type == SHT_DYNSYM)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
530 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
531 dynsym = &sections[i];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
532 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
533 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
534
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
535 /* Fix up relocations. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
536 for (int i = 0; i < image->e_shnum; i++)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
537 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
538 if (sections[i].sh_type == SHT_RELA)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
539 for (size_t offset = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
540 offset < sections[i].sh_size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
541 offset += sections[i].sh_entsize)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
542 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
543 Elf64_Rela *reloc = (Elf64_Rela *) ((char *) image
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
544 + sections[i].sh_offset
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
545 + offset);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
546 Elf64_Sym *sym =
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
547 (dynsym
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
548 ? (Elf64_Sym *) ((char *) image
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
549 + dynsym->sh_offset
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
550 + (dynsym->sh_entsize
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
551 * ELF64_R_SYM (reloc->r_info))) : NULL);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
552
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
553 int64_t S = (sym ? sym->st_value : 0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
554 int64_t P = reloc->r_offset + load_offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
555 int64_t A = reloc->r_addend;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
556 int64_t B = load_offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
557 int64_t V, size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
558 switch (ELF64_R_TYPE (reloc->r_info))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
559 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
560 case R_AMDGPU_ABS32_LO:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
561 V = (S + A) & 0xFFFFFFFF;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
562 size = 4;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
563 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
564 case R_AMDGPU_ABS32_HI:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
565 V = (S + A) >> 32;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
566 size = 4;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
567 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
568 case R_AMDGPU_ABS64:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
569 V = S + A;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
570 size = 8;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
571 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
572 case R_AMDGPU_REL32:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
573 V = S + A - P;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
574 size = 4;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
575 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
576 case R_AMDGPU_REL64:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
577 /* FIXME
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
578 LLD seems to emit REL64 where the the assembler has ABS64.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
579 This is clearly wrong because it's not what the compiler
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
580 is expecting. Let's assume, for now, that it's a bug.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
581 In any case, GCN kernels are always self contained and
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
582 therefore relative relocations will have been resolved
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
583 already, so this should be a safe workaround. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
584 V = S + A /* - P */ ;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
585 size = 8;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
586 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
587 case R_AMDGPU_ABS32:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
588 V = S + A;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
589 size = 4;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
590 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
591 /* TODO R_AMDGPU_GOTPCREL */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
592 /* TODO R_AMDGPU_GOTPCREL32_LO */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
593 /* TODO R_AMDGPU_GOTPCREL32_HI */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
594 case R_AMDGPU_REL32_LO:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
595 V = (S + A - P) & 0xFFFFFFFF;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
596 size = 4;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
597 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
598 case R_AMDGPU_REL32_HI:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
599 V = (S + A - P) >> 32;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
600 size = 4;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
601 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
602 case R_AMDGPU_RELATIVE64:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
603 V = B + A;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
604 size = 8;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
605 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
606 default:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
607 fprintf (stderr, "Error: unsupported relocation type.\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
608 exit (1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
609 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
610 XHSA (hsa_fns.hsa_memory_copy_fn ((void *) P, &V, size),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
611 "Fix up relocation");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
612 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
613 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
614 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
615
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
616 /* Allocate some device memory from the kernargs region.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
617 The returned address will be 32-bit (with excess zeroed on 64-bit host),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
618 and accessible via the same address on both host and target (via
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
619 __flat_scalar GCN address space). */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
620
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
621 static void *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
622 device_malloc (size_t size, hsa_region_t region)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
623 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
624 void *result;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
625 XHSA (hsa_fns.hsa_memory_allocate_fn (region, size, &result),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
626 "Allocate device memory");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
627 return result;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
628 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
629
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
630 /* These are the device pointers that will be transferred to the target.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
631 The HSA Runtime points the kernargs register here.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
632 They correspond to function signature:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
633 int main (int argc, char *argv[], int *return_value)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
634 The compiler expects this, for kernel functions, and will
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
635 automatically assign the exit value to *return_value. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
636 struct kernargs
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
637 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
638 /* Kernargs. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
639 int32_t argc;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
640 int64_t argv;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
641 int64_t out_ptr;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
642 int64_t heap_ptr;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
643
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
644 /* Output data. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
645 struct output
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
646 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
647 int return_value;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
648 unsigned int next_output;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
649 struct printf_data
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
650 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
651 int written;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
652 char msg[128];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
653 int type;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
654 union
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
655 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
656 int64_t ivalue;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
657 double dvalue;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
658 char text[128];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
659 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
660 } queue[1024];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
661 unsigned int consumed;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
662 } output_data;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
663 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
664
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
665 struct heap
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
666 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
667 int64_t size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
668 char data[0];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
669 } heap;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
670
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
671 /* Print any console output from the kernel.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
672 We print all entries from "consumed" to the next entry without a "written"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
673 flag, or "next_output" is reached. The buffer is circular, but the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
674 indices are absolute. It is assumed the kernel will stop writing data
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
675 if "next_output" wraps (becomes smaller than "consumed"). */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
676 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
677 gomp_print_output (struct kernargs *kernargs, bool final)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
678 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
679 unsigned int limit = (sizeof (kernargs->output_data.queue)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
680 / sizeof (kernargs->output_data.queue[0]));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
681
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
682 unsigned int from = __atomic_load_n (&kernargs->output_data.consumed,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
683 __ATOMIC_ACQUIRE);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
684 unsigned int to = kernargs->output_data.next_output;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
685
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
686 if (from > to)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
687 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
688 /* Overflow. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
689 if (final)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
690 printf ("GCN print buffer overflowed.\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
691 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
692 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
693
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
694 unsigned int i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
695 for (i = from; i < to; i++)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
696 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
697 struct printf_data *data = &kernargs->output_data.queue[i%limit];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
698
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
699 if (!data->written && !final)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
700 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
701
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
702 switch (data->type)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
703 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
704 case 0:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
705 printf ("%.128s%ld\n", data->msg, data->ivalue);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
706 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
707 case 1:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
708 printf ("%.128s%f\n", data->msg, data->dvalue);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
709 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
710 case 2:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
711 printf ("%.128s%.128s\n", data->msg, data->text);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
712 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
713 case 3:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
714 printf ("%.128s%.128s", data->msg, data->text);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
715 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
716 default:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
717 printf ("GCN print buffer error!\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
718 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
719 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
720
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
721 data->written = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
722 __atomic_store_n (&kernargs->output_data.consumed, i+1,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
723 __ATOMIC_RELEASE);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
724 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
725 fflush (stdout);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
726 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
727
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
728 /* Execute an already-loaded kernel on the device. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
729
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
730 static void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
731 run (uint64_t kernel, void *kernargs)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
732 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
733 /* A "signal" is used to launch and monitor the kernel. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
734 hsa_signal_t signal;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
735 XHSA (hsa_fns.hsa_signal_create_fn (1, 0, NULL, &signal),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
736 "Create signal");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
737
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
738 /* Configure for a single-worker kernel. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
739 uint64_t index = hsa_fns.hsa_queue_load_write_index_relaxed_fn (queue);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
740 const uint32_t queueMask = queue->size - 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
741 hsa_kernel_dispatch_packet_t *dispatch_packet =
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
742 &(((hsa_kernel_dispatch_packet_t *) (queue->base_address))[index &
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
743 queueMask]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
744 dispatch_packet->setup |= 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
745 dispatch_packet->workgroup_size_x = (uint16_t) 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
746 dispatch_packet->workgroup_size_y = (uint16_t) 64;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
747 dispatch_packet->workgroup_size_z = (uint16_t) 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
748 dispatch_packet->grid_size_x = 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
749 dispatch_packet->grid_size_y = 64;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
750 dispatch_packet->grid_size_z = 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
751 dispatch_packet->completion_signal = signal;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
752 dispatch_packet->kernel_object = kernel;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
753 dispatch_packet->kernarg_address = (void *) kernargs;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
754 dispatch_packet->private_segment_size = private_segment_size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
755 dispatch_packet->group_segment_size = group_segment_size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
756
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
757 uint16_t header = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
758 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
759 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
760 header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
761
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
762 __atomic_store_n ((uint32_t *) dispatch_packet,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
763 header | (dispatch_packet->setup << 16),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
764 __ATOMIC_RELEASE);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
765
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
766 if (debug)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
767 fprintf (stderr, "Launch kernel\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
768
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
769 hsa_fns.hsa_queue_store_write_index_relaxed_fn (queue, index + 1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
770 hsa_fns.hsa_signal_store_relaxed_fn (queue->doorbell_signal, index);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
771 /* Kernel running ...... */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
772 while (hsa_fns.hsa_signal_wait_relaxed_fn (signal, HSA_SIGNAL_CONDITION_LT,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
773 1, 1000000,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
774 HSA_WAIT_STATE_ACTIVE) != 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
775 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
776 usleep (10000);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
777 gomp_print_output (kernargs, false);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
778 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
779
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
780 gomp_print_output (kernargs, true);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
781
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
782 if (debug)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
783 fprintf (stderr, "Kernel exited\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
784
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
785 XHSA (hsa_fns.hsa_signal_destroy_fn (signal),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
786 "Clean up signal");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
787 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
788
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
789 int
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
790 main (int argc, char *argv[])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
791 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
792 int kernel_arg = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
793 for (int i = 1; i < argc; i++)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
794 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
795 if (!strcmp (argv[i], "--help"))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
796 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
797 usage (argv[0]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
798 return 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
799 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
800 else if (!strcmp (argv[i], "--version"))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
801 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
802 version (argv[0]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
803 return 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
804 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
805 else if (!strcmp (argv[i], "--debug"))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
806 debug = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
807 else if (argv[i][0] == '-')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
808 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
809 usage (argv[0]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
810 return 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
811 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
812 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
813 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
814 kernel_arg = i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
815 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
816 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
817 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
818
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
819 if (!kernel_arg)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
820 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
821 /* No kernel arguments were found. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
822 usage (argv[0]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
823 return 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
824 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
825
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
826 /* The remaining arguments are for the GCN kernel. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
827 int kernel_argc = argc - kernel_arg;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
828 char **kernel_argv = &argv[kernel_arg];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
829
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
830 init_device ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
831 load_image (kernel_argv[0]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
832
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
833 /* Calculate size of function parameters + argv data. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
834 size_t args_size = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
835 for (int i = 0; i < kernel_argc; i++)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
836 args_size += strlen (kernel_argv[i]) + 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
837
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
838 /* Allocate device memory for both function parameters and the argv
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
839 data. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
840 struct kernargs *kernargs = device_malloc (sizeof (*kernargs),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
841 kernargs_region);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
842 struct argdata
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
843 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
844 int64_t argv_data[kernel_argc];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
845 char strings[args_size];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
846 } *args = device_malloc (sizeof (struct argdata), kernargs_region);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
847
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
848 size_t heap_size = 10 * 1024 * 1024; /* 10MB. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
849 struct heap *heap = device_malloc (heap_size, heap_region);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
850 XHSA (hsa_fns.hsa_memory_assign_agent_fn (heap, device,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
851 HSA_ACCESS_PERMISSION_RW),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
852 "Assign heap to device agent");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
853
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
854 /* Write the data to the target. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
855 kernargs->argc = kernel_argc;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
856 kernargs->argv = (int64_t) args->argv_data;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
857 kernargs->out_ptr = (int64_t) &kernargs->output_data;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
858 kernargs->output_data.return_value = 0xcafe0000; /* Default return value. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
859 kernargs->output_data.next_output = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
860 for (unsigned i = 0; i < (sizeof (kernargs->output_data.queue)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
861 / sizeof (kernargs->output_data.queue[0])); i++)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
862 kernargs->output_data.queue[i].written = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
863 kernargs->output_data.consumed = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
864 int offset = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
865 for (int i = 0; i < kernel_argc; i++)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
866 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
867 size_t arg_len = strlen (kernel_argv[i]) + 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
868 args->argv_data[i] = (int64_t) &args->strings[offset];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
869 memcpy (&args->strings[offset], kernel_argv[i], arg_len + 1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
870 offset += arg_len;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
871 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
872 kernargs->heap_ptr = (int64_t) heap;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
873 hsa_fns.hsa_memory_copy_fn (&heap->size, &heap_size, sizeof (heap_size));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
874
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
875 /* Run constructors on the GPU. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
876 run (init_array_kernel, kernargs);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
877
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
878 /* Run the kernel on the GPU. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
879 run (main_kernel, kernargs);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
880 unsigned int return_value =
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
881 (unsigned int) kernargs->output_data.return_value;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
882
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
883 /* Run destructors on the GPU. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
884 run (fini_array_kernel, kernargs);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
885
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
886 unsigned int upper = (return_value & ~0xffff) >> 16;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
887 if (upper == 0xcafe)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
888 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
889 printf ("Kernel exit value was never set\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
890 return_value = 0xff;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
891 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
892 else if (upper == 0xffff)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
893 ; /* Set by exit. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
894 else if (upper == 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
895 ; /* Set by return from main. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
896 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
897 printf ("Possible kernel exit value corruption, 2 most significant bytes "
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
898 "aren't 0xffff, 0xcafe, or 0: 0x%x\n", return_value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
899
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
900 if (upper == 0xffff)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
901 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
902 unsigned int signal = (return_value >> 8) & 0xff;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
903 if (signal == SIGABRT)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
904 printf ("Kernel aborted\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
905 else if (signal != 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
906 printf ("Kernel received unkown signal\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
907 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
908
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
909 if (debug)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
910 printf ("Kernel exit value: %d\n", return_value & 0xff);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
911
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
912 /* Clean shut down. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
913 XHSA (hsa_fns.hsa_memory_free_fn (kernargs),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
914 "Clean up device memory");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
915 XHSA (hsa_fns.hsa_executable_destroy_fn (executable),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
916 "Clean up GCN executable");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
917 XHSA (hsa_fns.hsa_queue_destroy_fn (queue),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
918 "Clean up device queue");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
919 XHSA (hsa_fns.hsa_shut_down_fn (),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
920 "Shut down run-time");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
921
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
922 return return_value & 0xff;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
923 }