{
struct nvkm_device *device = gr->base.engine.subdev.device;
u32 data[6] = {}, data2[2] = {};
- u8 tpcnr[GPC_MAX];
u8 shift, ntpcv;
- int gpc, tpc, i;
-
- /* calculate first set of magics */
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
-
- data[tpc / 6] |= gpc << ((tpc % 6) * 5);
- }
+ int i;
- for (; tpc < 32; tpc++)
- data[tpc / 6] |= 7 << ((tpc % 6) * 5);
+ /* Pack tile map into register format. */
+ for (i = 0; i < 32; i++)
+ data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
- /* and the second... */
+ /* Magic. */
shift = 0;
ntpcv = gr->tpc_total;
while (!(ntpcv & (1 << 4))) {
{
struct nvkm_device *device = gr->base.engine.subdev.device;
u32 data[6] = {}, data2[2] = {};
- u8 tpcnr[GPC_MAX];
u8 shift, ntpcv;
- int gpc, tpc, i;
-
- /* calculate first set of magics */
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
-
- data[tpc / 6] |= gpc << ((tpc % 6) * 5);
- }
+ int i;
- for (; tpc < 32; tpc++)
- data[tpc / 6] |= 7 << ((tpc % 6) * 5);
+ /* Pack tile map into register format. */
+ for (i = 0; i < 32; i++)
+ data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
- /* and the second... */
+ /* Magic. */
shift = 0;
ntpcv = gr->tpc_total;
while (!(ntpcv & (1 << 4))) {
return ret;
}
+void
+gf100_gr_oneinit_tiles(struct gf100_gr *gr)
+{
+ static const u8 primes[] = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61
+ };
+ int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j;
+ u32 mul_factor, comm_denom;
+ u8 gpc_map[GPC_MAX];
+ bool sorted;
+
+ switch (gr->tpc_total) {
+ case 15: gr->screen_tile_row_offset = 0x06; break;
+ case 14: gr->screen_tile_row_offset = 0x05; break;
+ case 13: gr->screen_tile_row_offset = 0x02; break;
+ case 11: gr->screen_tile_row_offset = 0x07; break;
+ case 10: gr->screen_tile_row_offset = 0x06; break;
+ case 7:
+ case 5: gr->screen_tile_row_offset = 0x01; break;
+ case 3: gr->screen_tile_row_offset = 0x02; break;
+ case 2:
+ case 1: gr->screen_tile_row_offset = 0x01; break;
+ default: gr->screen_tile_row_offset = 0x03;
+ for (i = 0; i < ARRAY_SIZE(primes); i++) {
+ if (gr->tpc_total % primes[i]) {
+ gr->screen_tile_row_offset = primes[i];
+ break;
+ }
+ }
+ break;
+ }
+
+ /* Sort GPCs by TPC count, highest-to-lowest. */
+ for (i = 0; i < gr->gpc_nr; i++)
+ gpc_map[i] = i;
+ sorted = false;
+
+ while (!sorted) {
+ for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) {
+ if (gr->tpc_nr[gpc_map[i + 1]] >
+ gr->tpc_nr[gpc_map[i + 0]]) {
+ u8 swap = gpc_map[i];
+ gpc_map[i + 0] = gpc_map[i + 1];
+ gpc_map[i + 1] = swap;
+ sorted = false;
+ }
+ }
+ }
+
+ /* Determine tile->GPC mapping */
+ mul_factor = gr->gpc_nr * gr->tpc_max;
+ if (mul_factor & 1)
+ mul_factor = 2;
+ else
+ mul_factor = 1;
+
+ comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor;
+
+ for (i = 0; i < gr->gpc_nr; i++) {
+ init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
+ init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
+ run_err[i] = init_frac[i] + init_err[i];
+ }
+
+ for (i = 0; i < gr->tpc_total;) {
+ for (j = 0; j < gr->gpc_nr; j++) {
+ if ((run_err[j] * 2) >= comm_denom) {
+ gr->tile[i++] = gpc_map[j];
+ run_err[j] += init_frac[j] - comm_denom;
+ } else {
+ run_err[j] += init_frac[j];
+ }
+ }
+ }
+}
+
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
}
}
- /*XXX: these need figuring out... though it might not even matter */
- switch (device->chipset) {
- case 0xc0:
- if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
- gr->screen_tile_row_offset = 0x07;
- } else
- if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
- gr->screen_tile_row_offset = 0x05;
- } else
- if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
- gr->screen_tile_row_offset = 0x06;
- }
- break;
- case 0xc3: /* 450, 4/0/0/0, 2 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xc4: /* 460, 3/4/0/0, 4 */
- gr->screen_tile_row_offset = 0x01;
- break;
- case 0xc1: /* 2/0/0/0, 1 */
- gr->screen_tile_row_offset = 0x01;
- break;
- case 0xc8: /* 4/4/3/4, 5 */
- gr->screen_tile_row_offset = 0x06;
- break;
- case 0xce: /* 4/4/0/0, 4 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xcf: /* 4/0/0/0, 3 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xd7:
- case 0xd9: /* 1/0/0/0, 1 */
- case 0xea: /* gk20a */
- case 0x12b: /* gm20b */
- gr->screen_tile_row_offset = 0x01;
- break;
- }
-
+ memset(gr->tile, 0xff, sizeof(gr->tile));
+ gr->func->oneinit_tiles(gr);
return 0;
}
static const struct gf100_gr_func
gf100_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
u8 ppc_tpc_nr[GPC_MAX][4];
u8 ppc_tpc_min;
+ u8 screen_tile_row_offset;
+ u8 tile[TPC_MAX];
+
struct gf100_gr_data mmio_data[4];
struct gf100_gr_mmio mmio_list[4096/8];
u32 size;
u32 *data;
-
- u8 screen_tile_row_offset;
};
int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
struct gf100_gr_func {
void (*dtor)(struct gf100_gr *);
+ void (*oneinit_tiles)(struct gf100_gr *);
int (*init)(struct gf100_gr *);
void (*init_gpc_mmu)(struct gf100_gr *);
void (*init_r405a14)(struct gf100_gr *);
};
int gf100_gr_rops(struct gf100_gr *);
+void gf100_gr_oneinit_tiles(struct gf100_gr *);
int gf100_gr_init(struct gf100_gr *);
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
void gf100_gr_init_zcull(struct gf100_gr *);
int gk20a_gr_init(struct gf100_gr *);
+void gm200_gr_oneinit_tiles(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
static const struct gf100_gr_func
gf104_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gf108_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_r405a14 = gf108_gr_init_r405a14,
static const struct gf100_gr_func
gf110_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gf117_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gf119_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk104_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk110_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk110b_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk208_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gk20a_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
static const struct gf100_gr_func
gm107_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm107_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
}
+static u8
+gm200_gr_tile_map_6_24[] = {
+ 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2,
+};
+
+static u8
+gm200_gr_tile_map_4_16[] = {
+ 0, 1, 2, 3, 2, 3, 0, 1, 3, 0, 1, 2, 1, 2, 3, 0,
+};
+
+static u8
+gm200_gr_tile_map_2_8[] = {
+ 0, 1, 1, 0, 0, 1, 1, 0,
+};
+
+void
+gm200_gr_oneinit_tiles(struct gf100_gr *gr)
+{
+ /*XXX: Not sure what this is about. The algorithm from NVGPU
+ * seems to work for all boards I tried from earlier (and
+ * later) GPUs except in these specific configurations.
+ *
+ * Let's just hardcode them for now.
+ */
+ if (gr->gpc_nr == 2 && gr->tpc_total == 8) {
+ memcpy(gr->tile, gm200_gr_tile_map_2_8, gr->tpc_total);
+ gr->screen_tile_row_offset = 1;
+ } else
+ if (gr->gpc_nr == 4 && gr->tpc_total == 16) {
+ memcpy(gr->tile, gm200_gr_tile_map_4_16, gr->tpc_total);
+ gr->screen_tile_row_offset = 4;
+ } else
+ if (gr->gpc_nr == 6 && gr->tpc_total == 24) {
+ memcpy(gr->tile, gm200_gr_tile_map_6_24, gr->tpc_total);
+ gr->screen_tile_row_offset = 5;
+ } else {
+ gf100_gr_oneinit_tiles(gr);
+ }
+}
+
int
gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
int index, struct nvkm_gr **pgr)
static const struct gf100_gr_func
gm200_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_bios = gm107_gr_init_bios,
static const struct gf100_gr_func
gm20b_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gk20a_gr_init,
.init_zcull = gf117_gr_init_zcull,
.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
static const struct gf100_gr_func
gp100_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gp102_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gp104_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gp107_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
static const struct gf100_gr_func
gp10b_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
.init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,