No code changes, proven by envyas producing identical binaries.
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
--- /dev/null
+/* fuc microcode util functions for nvc0 PGRAPH
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#ifdef INCLUDE_CODE
+// queue_put - add request to queue
+//
+// In : $r13 queue pointer
+// $r14 command
+// $r15 data
+//
+queue_put:
+ // make sure we have space..
+ ld b32 $r8 D[$r13 + 0x0] // GET
+ ld b32 $r9 D[$r13 + 0x4] // PUT
+ xor $r8 8
+ cmpu b32 $r8 $r9
+ bra ne #queue_put_next
+ mov $r15 E_CMD_OVERFLOW
+ call #error
+ ret
+
+ // store cmd/data on queue
+ queue_put_next:
+ and $r8 $r9 7
+ shl b32 $r8 3
+ add b32 $r8 $r13
+ add b32 $r8 8
+ st b32 D[$r8 + 0x0] $r14
+ st b32 D[$r8 + 0x4] $r15
+
+ // update PUT
+ add b32 $r9 1
+ and $r9 0xf
+ st b32 D[$r13 + 0x4] $r9
+ ret
+
+// queue_get - fetch request from queue
+//
+// In : $r13 queue pointer
+//
+// Out: $p1 clear on success (data available)
+// $r14 command
+// $r15 data
+//
+queue_get:
+ bset $flags $p1
+ ld b32 $r8 D[$r13 + 0x0] // GET
+ ld b32 $r9 D[$r13 + 0x4] // PUT
+ cmpu b32 $r8 $r9
+ bra e #queue_get_done
+ // fetch first cmd/data pair
+ and $r9 $r8 7
+ shl b32 $r9 3
+ add b32 $r9 $r13
+ add b32 $r9 8
+ ld b32 $r14 D[$r9 + 0x0]
+ ld b32 $r15 D[$r9 + 0x4]
+
+ // update GET
+ add b32 $r8 1
+ and $r8 0xf
+ st b32 D[$r13 + 0x0] $r8
+ bclr $flags $p1
+queue_get_done:
+ ret
+
+// nv_rd32 - read 32-bit value from nv register
+//
+// In : $r14 register
+// Out: $r15 value
+//
+nv_rd32:
+ mov $r11 0x728
+ shl b32 $r11 6
+ mov b32 $r12 $r14
+ bset $r12 31 // MMIO_CTRL_PENDING
+ iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
+ nv_rd32_wait:
+ iord $r12 I[$r11 + 0x000]
+ xbit $r12 $r12 31
+ bra ne #nv_rd32_wait
+ mov $r10 6 // DONE_MMIO_RD
+ call #wait_doneo
+ iord $r15 I[$r11 + 0x100] // MMIO_RDVAL
+ ret
+
+// nv_wr32 - write 32-bit value to nv register
+//
+// In : $r14 register
+// $r15 value
+//
+nv_wr32:
+ mov $r11 0x728
+ shl b32 $r11 6
+ iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL
+ mov b32 $r12 $r14
+ bset $r12 31 // MMIO_CTRL_PENDING
+ bset $r12 30 // MMIO_CTRL_WRITE
+ iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
+ nv_wr32_wait:
+ iord $r12 I[$r11 + 0x000]
+ xbit $r12 $r12 31
+ bra ne #nv_wr32_wait
+ ret
+
+// (re)set watchdog timer
+//
+// In : $r15 timeout
+//
+watchdog_reset:
+ mov $r8 0x430
+ shl b32 $r8 6
+ bset $r15 31
+ iowr I[$r8 + 0x000] $r15
+ ret
+
+// clear watchdog timer
+watchdog_clear:
+ mov $r8 0x430
+ shl b32 $r8 6
+ iowr I[$r8 + 0x000] $r0
+ ret
+
+// wait_donez - wait on FUC_DONE bit to become clear
+//
+// In : $r10 bit to wait on
+//
+wait_donez:
+ trace_set(T_WAIT);
+ mov $r8 0x818
+ shl b32 $r8 6
+ iowr I[$r8 + 0x000] $r10
+ wait_donez_ne:
+ mov $r8 0x400
+ shl b32 $r8 6
+ iord $r8 I[$r8 + 0x000]
+ xbit $r8 $r8 $r10
+ bra ne #wait_donez_ne
+ trace_clr(T_WAIT)
+ ret
+
+// wait_doneo - wait on FUC_DONE bit to become set
+//
+// In : $r10 bit to wait on
+//
+wait_doneo:
+ trace_set(T_WAIT);
+ mov $r8 0x818
+ shl b32 $r8 6
+ iowr I[$r8 + 0x000] $r10
+ wait_doneo_e:
+ mov $r8 0x400
+ shl b32 $r8 6
+ iord $r8 I[$r8 + 0x000]
+ xbit $r8 $r8 $r10
+ bra e #wait_doneo_e
+ trace_clr(T_WAIT)
+ ret
+
+// mmctx_size - determine size of a mmio list transfer
+//
+// In : $r14 mmio list head
+// $r15 mmio list tail
+// Out: $r15 transfer size (in bytes)
+//
+mmctx_size:
+ clear b32 $r9
+ nv_mmctx_size_loop:
+ ld b32 $r8 D[$r14]
+ shr b32 $r8 26
+ add b32 $r8 1
+ shl b32 $r8 2
+ add b32 $r9 $r8
+ add b32 $r14 4
+ cmpu b32 $r14 $r15
+ bra ne #nv_mmctx_size_loop
+ mov b32 $r15 $r9
+ ret
+
+// mmctx_xfer - execute a list of mmio transfers
+//
+// In : $r10 flags
+// bit 0: direction (0 = save, 1 = load)
+// bit 1: set if first transfer
+// bit 2: set if last transfer
+// $r11 base
+// $r12 mmio list head
+// $r13 mmio list tail
+// $r14 multi_stride
+// $r15 multi_mask
+//
+mmctx_xfer:
+ trace_set(T_MMCTX)
+ mov $r8 0x710
+ shl b32 $r8 6
+ clear b32 $r9
+ or $r11 $r11
+ bra e #mmctx_base_disabled
+ iowr I[$r8 + 0x000] $r11 // MMCTX_BASE
+ bset $r9 0 // BASE_EN
+ mmctx_base_disabled:
+ or $r14 $r14
+ bra e #mmctx_multi_disabled
+ iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE
+ iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK
+ bset $r9 1 // MULTI_EN
+ mmctx_multi_disabled:
+ add b32 $r8 0x100
+
+ xbit $r11 $r10 0
+ shl b32 $r11 16 // DIR
+ bset $r11 12 // QLIMIT = 0x10
+ xbit $r14 $r10 1
+ shl b32 $r14 17
+ or $r11 $r14 // START_TRIGGER
+ iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
+
+ // loop over the mmio list, and send requests to the hw
+ mmctx_exec_loop:
+ // wait for space in mmctx queue
+ mmctx_wait_free:
+ iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
+ and $r14 0x1f
+ bra e #mmctx_wait_free
+
+ // queue up an entry
+ ld b32 $r14 D[$r12]
+ or $r14 $r9
+ iowr I[$r8 + 0x300] $r14
+ add b32 $r12 4
+ cmpu b32 $r12 $r13
+ bra ne #mmctx_exec_loop
+
+ xbit $r11 $r10 2
+ bra ne #mmctx_stop
+ // wait for queue to empty
+ mmctx_fini_wait:
+ iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
+ and $r11 0x1f
+ cmpu b32 $r11 0x10
+ bra ne #mmctx_fini_wait
+ mov $r10 2 // DONE_MMCTX
+ call #wait_donez
+ bra #mmctx_done
+ mmctx_stop:
+ xbit $r11 $r10 0
+ shl b32 $r11 16 // DIR
+ bset $r11 12 // QLIMIT = 0x10
+ bset $r11 18 // STOP_TRIGGER
+ iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
+ mmctx_stop_wait:
+ // wait for STOP_TRIGGER to clear
+ iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
+ xbit $r11 $r11 18
+ bra ne #mmctx_stop_wait
+ mmctx_done:
+ trace_clr(T_MMCTX)
+ ret
+
+// Wait for DONE_STRAND
+//
+strand_wait:
+ push $r10
+ mov $r10 2
+ call #wait_donez
+ pop $r10
+ ret
+
+// unknown - call before issuing strand commands
+//
+strand_pre:
+ mov $r8 0x4afc
+ sethi $r8 0x20000
+ mov $r9 0xc
+ iowr I[$r8] $r9
+ call #strand_wait
+ ret
+
+// unknown - call after issuing strand commands
+//
+strand_post:
+ mov $r8 0x4afc
+ sethi $r8 0x20000
+ mov $r9 0xd
+ iowr I[$r8] $r9
+ call #strand_wait
+ ret
+
+// Selects strand set?!
+//
+// In: $r14 id
+//
+strand_set:
+ mov $r10 0x4ffc
+ sethi $r10 0x20000
+ sub b32 $r11 $r10 0x500
+ mov $r12 0xf
+ iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf
+ mov $r12 0xb
+ iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb
+ call #strand_wait
+ iowr I[$r10 + 0x000] $r14 // 0x93c = <id>
+ mov $r12 0xa
+ iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa
+ call #strand_wait
+ ret
+
+// Initialise strand context data
+//
+// In : $r15 context base
+// Out: $r15 context size (in bytes)
+//
+// Strandset(?) 3 hardcoded currently
+//
+strand_ctx_init:
+ trace_set(T_STRINIT)
+ call #strand_pre
+ mov $r14 3
+ call #strand_set
+ mov $r10 0x46fc
+ sethi $r10 0x20000
+ add b32 $r11 $r10 0x400
+ iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0
+ mov $r12 1
+ iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE
+ call #strand_wait
+ sub b32 $r12 $r0 1
+ iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff
+ mov $r12 2
+ iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT
+ call #strand_wait
+ call #strand_post
+
+ // read the size of each strand, poke the context offset of
+ // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
+ // about it later then.
+ mov $r8 0x880
+ shl b32 $r8 6
+ iord $r9 I[$r8 + 0x000] // STRANDS
+ add b32 $r8 0x2200
+ shr b32 $r14 $r15 8
+ ctx_init_strand_loop:
+ iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE
+ iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE
+ iord $r10 I[$r8 + 0x200] // STRAND_SIZE
+ shr b32 $r10 6
+ add b32 $r10 1
+ add b32 $r14 $r10
+ add b32 $r8 4
+ sub b32 $r9 1
+ bra ne #ctx_init_strand_loop
+
+ shl b32 $r14 8
+ sub b32 $r15 $r14 $r15
+ trace_clr(T_STRINIT)
+ ret
+#endif
--- /dev/null
+/* fuc microcode for nvc0 PGRAPH/GPC
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+/* TODO
+ * - bracket certain functions with scratch writes, useful for debugging
+ * - watchdog timer around ctx operations
+ */
+
+#ifdef INCLUDE_DATA
+gpc_id: .b32 0
+gpc_mmio_list_head: .b32 0
+gpc_mmio_list_tail: .b32 0
+
+tpc_count: .b32 0
+tpc_mask: .b32 0
+tpc_mmio_list_head: .b32 0
+tpc_mmio_list_tail: .b32 0
+
+cmd_queue: queue_init
+#endif
+
+#ifdef INCLUDE_CODE
+// reports an exception to the host
+//
+// In: $r15 error code (see nvc0.fuc)
+//
+error:
+ push $r14
+ mov $r14 -0x67ec // 0x9814
+ sethi $r14 0x400000
+ call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
+ add b32 $r14 0x41c
+ mov $r15 1
+ call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET
+ pop $r14
+ ret
+
+// GPC fuc initialisation, executed by triggering ucode start, will
+// fall through to main loop after completion.
+//
+// Input:
+// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
+// CC_SCRATCH[1]: context base
+//
+// Output:
+// CC_SCRATCH[0]:
+// 31:31: set to signal completion
+// CC_SCRATCH[1]:
+// 31:0: GPC context size
+//
+init:
+ clear b32 $r0
+ mov $sp $r0
+
+ // enable fifo access
+ mov $r1 0x1200
+ mov $r2 2
+ iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
+
+ // setup i0 handler, and route all interrupts to it
+ mov $r1 #ih
+ mov $iv0 $r1
+ mov $r1 0x400
+ iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
+
+ // enable fifo interrupt
+ mov $r2 4
+ iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
+
+ // enable interrupts
+ bset $flags ie0
+
+ // figure out which GPC we are, and how many TPCs we have
+ mov $r1 0x608
+ shl b32 $r1 6
+ iord $r2 I[$r1 + 0x000] // UNITS
+ mov $r3 1
+ and $r2 0x1f
+ shl b32 $r3 $r2
+ sub b32 $r3 1
+ st b32 D[$r0 + #tpc_count] $r2
+ st b32 D[$r0 + #tpc_mask] $r3
+ add b32 $r1 0x400
+ iord $r2 I[$r1 + 0x000] // MYINDEX
+ st b32 D[$r0 + #gpc_id] $r2
+
+ // find context data for this chipset
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
+ mov $r1 #chipsets - 12
+ init_find_chipset:
+ add b32 $r1 12
+ ld b32 $r3 D[$r1 + 0x00]
+ cmpu b32 $r3 $r2
+ bra e #init_context
+ cmpu b32 $r3 0
+ bra ne #init_find_chipset
+ // unknown chipset
+ ret
+
+ // initialise context base, and size tracking
+ init_context:
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
+ clear b32 $r3 // track GPC context size here
+
+ // set mmctx base addresses now so we don't have to do it later,
+ // they don't currently ever change
+ mov $r4 0x700
+ shl b32 $r4 6
+ shr b32 $r5 $r2 8
+ iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
+ iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
+
+ // calculate GPC mmio context size, store the chipset-specific
+ // mmio list pointers somewhere we can get at them later without
+ // re-parsing the chipset list
+ clear b32 $r14
+ clear b32 $r15
+ ld b16 $r14 D[$r1 + 4]
+ ld b16 $r15 D[$r1 + 6]
+ st b16 D[$r0 + #gpc_mmio_list_head] $r14
+ st b16 D[$r0 + #gpc_mmio_list_tail] $r15
+ call #mmctx_size
+ add b32 $r2 $r15
+ add b32 $r3 $r15
+
+ // calculate per-TPC mmio context size, store the list pointers
+ ld b16 $r14 D[$r1 + 8]
+ ld b16 $r15 D[$r1 + 10]
+ st b16 D[$r0 + #tpc_mmio_list_head] $r14
+ st b16 D[$r0 + #tpc_mmio_list_tail] $r15
+ call #mmctx_size
+ ld b32 $r14 D[$r0 + #tpc_count]
+ mulu $r14 $r15
+ add b32 $r2 $r14
+ add b32 $r3 $r14
+
+ // round up base/size to 256 byte boundary (for strand SWBASE)
+ add b32 $r4 0x1300
+ shr b32 $r3 2
+ iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
+ shr b32 $r2 8
+ shr b32 $r3 6
+ add b32 $r2 1
+ add b32 $r3 1
+ shl b32 $r2 8
+ shl b32 $r3 8
+
+ // calculate size of strand context data
+ mov b32 $r15 $r2
+ call #strand_ctx_init
+ add b32 $r3 $r15
+
+ // save context size, and tell HUB we're done
+ mov $r1 0x800
+ shl b32 $r1 6
+ iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
+ add b32 $r1 0x800
+ clear b32 $r2
+ bset $r2 31
+ iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
+
+// Main program loop, very simple, sleeps until woken up by the interrupt
+// handler, pulls a command from the queue and executes its handler
+//
+main:
+ bset $flags $p0
+ sleep $p0
+ mov $r13 #cmd_queue
+ call #queue_get
+ bra $p1 #main
+
+ // 0x0000-0x0003 are all context transfers
+ cmpu b32 $r14 0x04
+ bra nc #main_not_ctx_xfer
+ // fetch $flags and mask off $p1/$p2
+ mov $r1 $flags
+ mov $r2 0x0006
+ not b32 $r2
+ and $r1 $r2
+ // set $p1/$p2 according to transfer type
+ shl b32 $r14 1
+ or $r1 $r14
+ mov $flags $r1
+ // transfer context data
+ call #ctx_xfer
+ bra #main
+
+ main_not_ctx_xfer:
+ shl b32 $r15 $r14 16
+ or $r15 E_BAD_COMMAND
+ call #error
+ bra #main
+
+// interrupt handler
+ih:
+ push $r8
+ mov $r8 $flags
+ push $r8
+ push $r9
+ push $r10
+ push $r11
+ push $r13
+ push $r14
+ push $r15
+
+ // incoming fifo command?
+ iord $r10 I[$r0 + 0x200] // INTR
+ and $r11 $r10 0x00000004
+ bra e #ih_no_fifo
+ // queue incoming fifo command for later processing
+ mov $r11 0x1900
+ mov $r13 #cmd_queue
+ iord $r14 I[$r11 + 0x100] // FIFO_CMD
+ iord $r15 I[$r11 + 0x000] // FIFO_DATA
+ call #queue_put
+ add b32 $r11 0x400
+ mov $r14 1
+ iowr I[$r11 + 0x000] $r14 // FIFO_ACK
+
+ // ack, and wake up main()
+ ih_no_fifo:
+ iowr I[$r0 + 0x100] $r10 // INTR_ACK
+
+ pop $r15
+ pop $r14
+ pop $r13
+ pop $r11
+ pop $r10
+ pop $r9
+ pop $r8
+ mov $flags $r8
+ pop $r8
+ bclr $flags $p0
+ iret
+
+// Set this GPC's bit in HUB_BAR, used to signal completion of various
+// activities to the HUB fuc
+//
+hub_barrier_done:
+ mov $r15 1
+ ld b32 $r14 D[$r0 + #gpc_id]
+ shl b32 $r15 $r14
+ mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
+ sethi $r14 0x400000
+ call #nv_wr32
+ ret
+
+// Disables various things, waits a bit, and re-enables them..
+//
+// Not sure how exactly this helps, perhaps "ENABLE" is not such a
+// good description for the bits we turn off? Anyways, without this,
+// funny things happen.
+//
+ctx_redswitch:
+ mov $r14 0x614
+ shl b32 $r14 6
+ mov $r15 0x020
+ iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
+ mov $r15 8
+ ctx_redswitch_delay:
+ sub b32 $r15 1
+ bra ne #ctx_redswitch_delay
+ mov $r15 0xa20
+ iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
+ ret
+
+// Transfer GPC context data between GPU and storage area
+//
+// In: $r15 context base address
+// $p1 clear on save, set on load
+// $p2 set if opposite direction done/will be done, so:
+// on save it means: "a load will follow this save"
+// on load it means: "a save preceeded this load"
+//
+ctx_xfer:
+ // set context base address
+ mov $r1 0xa04
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r15// MEM_BASE
+ bra not $p1 #ctx_xfer_not_load
+ call #ctx_redswitch
+ ctx_xfer_not_load:
+
+ // strands
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xc
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
+ call #strand_wait
+ mov $r2 0x47fc
+ sethi $r2 0x20000
+ iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
+ xbit $r2 $flags $p1
+ add b32 $r2 3
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
+
+ // mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 2 // first
+ mov $r11 0x0000
+ sethi $r11 0x500000
+ ld b32 $r12 D[$r0 + #gpc_id]
+ shl b32 $r12 15
+ add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
+ ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
+ ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
+ mov $r14 0 // not multi
+ call #mmctx_xfer
+
+ // per-TPC mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 4 // last
+ mov $r11 0x4000
+ sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
+ ld b32 $r12 D[$r0 + #gpc_id]
+ shl b32 $r12 15
+ add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
+ ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
+ ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
+ ld b32 $r15 D[$r0 + #tpc_mask]
+ mov $r14 0x800 // stride = 0x800
+ call #mmctx_xfer
+
+ // wait for strands to finish
+ call #strand_wait
+
+ // if load, or a save without a load following, do some
+ // unknown stuff that's done after finishing a block of
+ // strand commands
+ bra $p1 #ctx_xfer_post
+ bra not $p2 #ctx_xfer_done
+ ctx_xfer_post:
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xd
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
+ call #strand_wait
+
+ // mark completion in HUB's barrier
+ ctx_xfer_done:
+ call #hub_barrier_done
+ ret
+#endif
-/* fuc microcode for nvc0 PGRAPH/GPC
- *
- * Copyright 2011 Red Hat Inc.
+/*
+ * Copyright 2013 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Authors: Ben Skeggs
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
*/
-/* To build:
- * m4 gpcnvc0.fuc | envyas -a -w -m fuc -V fuc3 -o gpcnvc0.fuc.h
- */
-
-/* TODO
- * - bracket certain functions with scratch writes, useful for debugging
- * - watchdog timer around ctx operations
- */
+#define NVGF
+#include "macros.fuc"
.section #nvc0_grgpc_data
-include(`nvc0.fuc')
-gpc_id: .b32 0
-gpc_mmio_list_head: .b32 0
-gpc_mmio_list_tail: .b32 0
-
-tpc_count: .b32 0
-tpc_mask: .b32 0
-tpc_mmio_list_head: .b32 0
-tpc_mmio_list_tail: .b32 0
+#define INCLUDE_DATA
+#include "com.fuc"
+#include "gpc.fuc"
-cmd_queue: queue_init
-
-// chipset descriptions
chipsets:
.b8 0xc0 0 0 0
.b16 #nvc0_gpc_mmio_head
mmctx_data(0x000424, 2);
mmctx_data(0x0006e0, 1);
nvd9_tpc_mmio_tail:
+#undef INCLUDE_DATA
.section #nvc0_grgpc_code
+#define INCLUDE_CODE
bra #init
-define(`include_code')
-include(`nvc0.fuc')
-
-// reports an exception to the host
-//
-// In: $r15 error code (see nvc0.fuc)
-//
-error:
- push $r14
- mov $r14 -0x67ec // 0x9814
- sethi $r14 0x400000
- call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
- add b32 $r14 0x41c
- mov $r15 1
- call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET
- pop $r14
- ret
-
-// GPC fuc initialisation, executed by triggering ucode start, will
-// fall through to main loop after completion.
-//
-// Input:
-// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
-// CC_SCRATCH[1]: context base
-//
-// Output:
-// CC_SCRATCH[0]:
-// 31:31: set to signal completion
-// CC_SCRATCH[1]:
-// 31:0: GPC context size
-//
-init:
- clear b32 $r0
- mov $sp $r0
-
- // enable fifo access
- mov $r1 0x1200
- mov $r2 2
- iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
-
- // setup i0 handler, and route all interrupts to it
- mov $r1 #ih
- mov $iv0 $r1
- mov $r1 0x400
- iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
-
- // enable fifo interrupt
- mov $r2 4
- iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
-
- // enable interrupts
- bset $flags ie0
-
- // figure out which GPC we are, and how many TPCs we have
- mov $r1 0x608
- shl b32 $r1 6
- iord $r2 I[$r1 + 0x000] // UNITS
- mov $r3 1
- and $r2 0x1f
- shl b32 $r3 $r2
- sub b32 $r3 1
- st b32 D[$r0 + #tpc_count] $r2
- st b32 D[$r0 + #tpc_mask] $r3
- add b32 $r1 0x400
- iord $r2 I[$r1 + 0x000] // MYINDEX
- st b32 D[$r0 + #gpc_id] $r2
-
- // find context data for this chipset
- mov $r2 0x800
- shl b32 $r2 6
- iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
- mov $r1 #chipsets - 12
- init_find_chipset:
- add b32 $r1 12
- ld b32 $r3 D[$r1 + 0x00]
- cmpu b32 $r3 $r2
- bra e #init_context
- cmpu b32 $r3 0
- bra ne #init_find_chipset
- // unknown chipset
- ret
-
- // initialise context base, and size tracking
- init_context:
- mov $r2 0x800
- shl b32 $r2 6
- iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
- clear b32 $r3 // track GPC context size here
-
- // set mmctx base addresses now so we don't have to do it later,
- // they don't currently ever change
- mov $r4 0x700
- shl b32 $r4 6
- shr b32 $r5 $r2 8
- iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
- iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
-
- // calculate GPC mmio context size, store the chipset-specific
- // mmio list pointers somewhere we can get at them later without
- // re-parsing the chipset list
- clear b32 $r14
- clear b32 $r15
- ld b16 $r14 D[$r1 + 4]
- ld b16 $r15 D[$r1 + 6]
- st b16 D[$r0 + #gpc_mmio_list_head] $r14
- st b16 D[$r0 + #gpc_mmio_list_tail] $r15
- call #mmctx_size
- add b32 $r2 $r15
- add b32 $r3 $r15
-
- // calculate per-TPC mmio context size, store the list pointers
- ld b16 $r14 D[$r1 + 8]
- ld b16 $r15 D[$r1 + 10]
- st b16 D[$r0 + #tpc_mmio_list_head] $r14
- st b16 D[$r0 + #tpc_mmio_list_tail] $r15
- call #mmctx_size
- ld b32 $r14 D[$r0 + #tpc_count]
- mulu $r14 $r15
- add b32 $r2 $r14
- add b32 $r3 $r14
-
- // round up base/size to 256 byte boundary (for strand SWBASE)
- add b32 $r4 0x1300
- shr b32 $r3 2
- iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
- shr b32 $r2 8
- shr b32 $r3 6
- add b32 $r2 1
- add b32 $r3 1
- shl b32 $r2 8
- shl b32 $r3 8
-
- // calculate size of strand context data
- mov b32 $r15 $r2
- call #strand_ctx_init
- add b32 $r3 $r15
-
- // save context size, and tell HUB we're done
- mov $r1 0x800
- shl b32 $r1 6
- iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
- add b32 $r1 0x800
- clear b32 $r2
- bset $r2 31
- iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
-
-// Main program loop, very simple, sleeps until woken up by the interrupt
-// handler, pulls a command from the queue and executes its handler
-//
-main:
- bset $flags $p0
- sleep $p0
- mov $r13 #cmd_queue
- call #queue_get
- bra $p1 #main
-
- // 0x0000-0x0003 are all context transfers
- cmpu b32 $r14 0x04
- bra nc #main_not_ctx_xfer
- // fetch $flags and mask off $p1/$p2
- mov $r1 $flags
- mov $r2 0x0006
- not b32 $r2
- and $r1 $r2
- // set $p1/$p2 according to transfer type
- shl b32 $r14 1
- or $r1 $r14
- mov $flags $r1
- // transfer context data
- call #ctx_xfer
- bra #main
-
- main_not_ctx_xfer:
- shl b32 $r15 $r14 16
- or $r15 E_BAD_COMMAND
- call #error
- bra #main
-
-// interrupt handler
-ih:
- push $r8
- mov $r8 $flags
- push $r8
- push $r9
- push $r10
- push $r11
- push $r13
- push $r14
- push $r15
-
- // incoming fifo command?
- iord $r10 I[$r0 + 0x200] // INTR
- and $r11 $r10 0x00000004
- bra e #ih_no_fifo
- // queue incoming fifo command for later processing
- mov $r11 0x1900
- mov $r13 #cmd_queue
- iord $r14 I[$r11 + 0x100] // FIFO_CMD
- iord $r15 I[$r11 + 0x000] // FIFO_DATA
- call #queue_put
- add b32 $r11 0x400
- mov $r14 1
- iowr I[$r11 + 0x000] $r14 // FIFO_ACK
-
- // ack, and wake up main()
- ih_no_fifo:
- iowr I[$r0 + 0x100] $r10 // INTR_ACK
-
- pop $r15
- pop $r14
- pop $r13
- pop $r11
- pop $r10
- pop $r9
- pop $r8
- mov $flags $r8
- pop $r8
- bclr $flags $p0
- iret
-
-// Set this GPC's bit in HUB_BAR, used to signal completion of various
-// activities to the HUB fuc
-//
-hub_barrier_done:
- mov $r15 1
- ld b32 $r14 D[$r0 + #gpc_id]
- shl b32 $r15 $r14
- mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
- sethi $r14 0x400000
- call #nv_wr32
- ret
-
-// Disables various things, waits a bit, and re-enables them..
-//
-// Not sure how exactly this helps, perhaps "ENABLE" is not such a
-// good description for the bits we turn off? Anyways, without this,
-// funny things happen.
-//
-ctx_redswitch:
- mov $r14 0x614
- shl b32 $r14 6
- mov $r15 0x020
- iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
- mov $r15 8
- ctx_redswitch_delay:
- sub b32 $r15 1
- bra ne #ctx_redswitch_delay
- mov $r15 0xa20
- iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
- ret
-
-// Transfer GPC context data between GPU and storage area
-//
-// In: $r15 context base address
-// $p1 clear on save, set on load
-// $p2 set if opposite direction done/will be done, so:
-// on save it means: "a load will follow this save"
-// on load it means: "a save preceeded this load"
-//
-ctx_xfer:
- // set context base address
- mov $r1 0xa04
- shl b32 $r1 6
- iowr I[$r1 + 0x000] $r15// MEM_BASE
- bra not $p1 #ctx_xfer_not_load
- call #ctx_redswitch
- ctx_xfer_not_load:
-
- // strands
- mov $r1 0x4afc
- sethi $r1 0x20000
- mov $r2 0xc
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
- call #strand_wait
- mov $r2 0x47fc
- sethi $r2 0x20000
- iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
- xbit $r2 $flags $p1
- add b32 $r2 3
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
-
- // mmio context
- xbit $r10 $flags $p1 // direction
- or $r10 2 // first
- mov $r11 0x0000
- sethi $r11 0x500000
- ld b32 $r12 D[$r0 + #gpc_id]
- shl b32 $r12 15
- add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
- ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
- ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
- mov $r14 0 // not multi
- call #mmctx_xfer
-
- // per-TPC mmio context
- xbit $r10 $flags $p1 // direction
- or $r10 4 // last
- mov $r11 0x4000
- sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
- ld b32 $r12 D[$r0 + #gpc_id]
- shl b32 $r12 15
- add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
- ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
- ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
- ld b32 $r15 D[$r0 + #tpc_mask]
- mov $r14 0x800 // stride = 0x800
- call #mmctx_xfer
-
- // wait for strands to finish
- call #strand_wait
-
- // if load, or a save without a load following, do some
- // unknown stuff that's done after finishing a block of
- // strand commands
- bra $p1 #ctx_xfer_post
- bra not $p2 #ctx_xfer_done
- ctx_xfer_post:
- mov $r1 0x4afc
- sethi $r1 0x20000
- mov $r2 0xd
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
- call #strand_wait
-
- // mark completion in HUB's barrier
- ctx_xfer_done:
- call #hub_barrier_done
- ret
-
+#include "com.fuc"
+#include "gpc.fuc"
.align 256
+#undef INCLUDE_CODE
0x0089d000,
0x081887f1,
0xd00684b6,
-/* 0x00e2: wait_done_wait_donez */
+/* 0x00e2: wait_donez_ne */
0x87f1008a,
0x84b60400,
0x0088cf06,
0x87f10089,
0x84b60818,
0x008ad006,
-/* 0x011c: wait_done_wait_doneo */
+/* 0x011c: wait_doneo_e */
0x040087f1,
0xcf0684b6,
0x8aff0088,
-/* fuc microcode for nve0 PGRAPH/GPC
- *
- * Copyright 2011 Red Hat Inc.
+/*
+ * Copyright 2013 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Authors: Ben Skeggs
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
*/
-/* To build:
- * m4 nve0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grgpc.fuc.h
- */
-
-/* TODO
- * - bracket certain functions with scratch writes, useful for debugging
- * - watchdog timer around ctx operations
- */
+#define NVGK
+#include "macros.fuc"
.section #nve0_grgpc_data
-include(`nve0.fuc')
-gpc_id: .b32 0
-gpc_mmio_list_head: .b32 0
-gpc_mmio_list_tail: .b32 0
-
-tpc_count: .b32 0
-tpc_mask: .b32 0
-tpc_mmio_list_head: .b32 0
-tpc_mmio_list_tail: .b32 0
+#define INCLUDE_DATA
+#include "com.fuc"
+#include "gpc.fuc"
-cmd_queue: queue_init
-
-// chipset descriptions
chipsets:
.b8 0xe4 0 0 0
.b16 #nve4_gpc_mmio_head
mmctx_data(0x000770, 1)
mmctx_data(0x000778, 2)
nvf0_tpc_mmio_tail:
+#undef INCLUDE_DATA
.section #nve0_grgpc_code
+#define INCLUDE_CODE
bra #init
-define(`include_code')
-include(`nve0.fuc')
-
-// reports an exception to the host
-//
-// In: $r15 error code (see nve0.fuc)
-//
-error:
- push $r14
- mov $r14 -0x67ec // 0x9814
- sethi $r14 0x400000
- call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code
- add b32 $r14 0x41c
- mov $r15 1
- call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET
- pop $r14
- ret
-
-// GPC fuc initialisation, executed by triggering ucode start, will
-// fall through to main loop after completion.
-//
-// Input:
-// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
-// CC_SCRATCH[1]: context base
-//
-// Output:
-// CC_SCRATCH[0]:
-// 31:31: set to signal completion
-// CC_SCRATCH[1]:
-// 31:0: GPC context size
-//
-init:
- clear b32 $r0
- mov $sp $r0
-
- // enable fifo access
- mov $r1 0x1200
- mov $r2 2
- iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
-
- // setup i0 handler, and route all interrupts to it
- mov $r1 #ih
- mov $iv0 $r1
- mov $r1 0x400
- iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
-
- // enable fifo interrupt
- mov $r2 4
- iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
-
- // enable interrupts
- bset $flags ie0
-
- // figure out which GPC we are, and how many TPCs we have
- mov $r1 0x608
- shl b32 $r1 6
- iord $r2 I[$r1 + 0x000] // UNITS
- mov $r3 1
- and $r2 0x1f
- shl b32 $r3 $r2
- sub b32 $r3 1
- st b32 D[$r0 + #tpc_count] $r2
- st b32 D[$r0 + #tpc_mask] $r3
- add b32 $r1 0x400
- iord $r2 I[$r1 + 0x000] // MYINDEX
- st b32 D[$r0 + #gpc_id] $r2
-
- // find context data for this chipset
- mov $r2 0x800
- shl b32 $r2 6
- iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
- mov $r1 #chipsets - 12
- init_find_chipset:
- add b32 $r1 12
- ld b32 $r3 D[$r1 + 0x00]
- cmpu b32 $r3 $r2
- bra e #init_context
- cmpu b32 $r3 0
- bra ne #init_find_chipset
- // unknown chipset
- ret
-
- // initialise context base, and size tracking
- init_context:
- mov $r2 0x800
- shl b32 $r2 6
- iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base
- clear b32 $r3 // track GPC context size here
-
- // set mmctx base addresses now so we don't have to do it later,
- // they don't currently ever change
- mov $r4 0x700
- shl b32 $r4 6
- shr b32 $r5 $r2 8
- iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE
- iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE
-
- // calculate GPC mmio context size, store the chipset-specific
- // mmio list pointers somewhere we can get at them later without
- // re-parsing the chipset list
- clear b32 $r14
- clear b32 $r15
- ld b16 $r14 D[$r1 + 4]
- ld b16 $r15 D[$r1 + 6]
- st b16 D[$r0 + #gpc_mmio_list_head] $r14
- st b16 D[$r0 + #gpc_mmio_list_tail] $r15
- call #mmctx_size
- add b32 $r2 $r15
- add b32 $r3 $r15
-
- // calculate per-TPC mmio context size, store the list pointers
- ld b16 $r14 D[$r1 + 8]
- ld b16 $r15 D[$r1 + 10]
- st b16 D[$r0 + #tpc_mmio_list_head] $r14
- st b16 D[$r0 + #tpc_mmio_list_tail] $r15
- call #mmctx_size
- ld b32 $r14 D[$r0 + #tpc_count]
- mulu $r14 $r15
- add b32 $r2 $r14
- add b32 $r3 $r14
-
- // round up base/size to 256 byte boundary (for strand SWBASE)
- add b32 $r4 0x1300
- shr b32 $r3 2
- iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!?
- shr b32 $r2 8
- shr b32 $r3 6
- add b32 $r2 1
- add b32 $r3 1
- shl b32 $r2 8
- shl b32 $r3 8
-
- // calculate size of strand context data
- mov b32 $r15 $r2
- call #strand_ctx_init
- add b32 $r3 $r15
-
- // save context size, and tell HUB we're done
- mov $r1 0x800
- shl b32 $r1 6
- iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size
- add b32 $r1 0x800
- clear b32 $r2
- bset $r2 31
- iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
-
-// Main program loop, very simple, sleeps until woken up by the interrupt
-// handler, pulls a command from the queue and executes its handler
-//
-main:
- bset $flags $p0
- sleep $p0
- mov $r13 #cmd_queue
- call #queue_get
- bra $p1 #main
-
- // 0x0000-0x0003 are all context transfers
- cmpu b32 $r14 0x04
- bra nc #main_not_ctx_xfer
- // fetch $flags and mask off $p1/$p2
- mov $r1 $flags
- mov $r2 0x0006
- not b32 $r2
- and $r1 $r2
- // set $p1/$p2 according to transfer type
- shl b32 $r14 1
- or $r1 $r14
- mov $flags $r1
- // transfer context data
- call #ctx_xfer
- bra #main
-
- main_not_ctx_xfer:
- shl b32 $r15 $r14 16
- or $r15 E_BAD_COMMAND
- call #error
- bra #main
-
-// interrupt handler
-ih:
- push $r8
- mov $r8 $flags
- push $r8
- push $r9
- push $r10
- push $r11
- push $r13
- push $r14
- push $r15
-
- // incoming fifo command?
- iord $r10 I[$r0 + 0x200] // INTR
- and $r11 $r10 0x00000004
- bra e #ih_no_fifo
- // queue incoming fifo command for later processing
- mov $r11 0x1900
- mov $r13 #cmd_queue
- iord $r14 I[$r11 + 0x100] // FIFO_CMD
- iord $r15 I[$r11 + 0x000] // FIFO_DATA
- call #queue_put
- add b32 $r11 0x400
- mov $r14 1
- iowr I[$r11 + 0x000] $r14 // FIFO_ACK
-
- // ack, and wake up main()
- ih_no_fifo:
- iowr I[$r0 + 0x100] $r10 // INTR_ACK
-
- pop $r15
- pop $r14
- pop $r13
- pop $r11
- pop $r10
- pop $r9
- pop $r8
- mov $flags $r8
- pop $r8
- bclr $flags $p0
- iret
-
-// Set this GPC's bit in HUB_BAR, used to signal completion of various
-// activities to the HUB fuc
-//
-hub_barrier_done:
- mov $r15 1
- ld b32 $r14 D[$r0 + #gpc_id]
- shl b32 $r15 $r14
- mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET
- sethi $r14 0x400000
- call #nv_wr32
- ret
-
-// Disables various things, waits a bit, and re-enables them..
-//
-// Not sure how exactly this helps, perhaps "ENABLE" is not such a
-// good description for the bits we turn off? Anyways, without this,
-// funny things happen.
-//
-ctx_redswitch:
- mov $r14 0x614
- shl b32 $r14 6
- mov $r15 0x020
- iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER
- mov $r15 8
- ctx_redswitch_delay:
- sub b32 $r15 1
- bra ne #ctx_redswitch_delay
- mov $r15 0xa20
- iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER
- ret
-
-// Transfer GPC context data between GPU and storage area
-//
-// In: $r15 context base address
-// $p1 clear on save, set on load
-// $p2 set if opposite direction done/will be done, so:
-// on save it means: "a load will follow this save"
-// on load it means: "a save preceeded this load"
-//
-ctx_xfer:
- // set context base address
- mov $r1 0xa04
- shl b32 $r1 6
- iowr I[$r1 + 0x000] $r15// MEM_BASE
- bra not $p1 #ctx_xfer_not_load
- call #ctx_redswitch
- ctx_xfer_not_load:
-
- // strands
- mov $r1 0x4afc
- sethi $r1 0x20000
- mov $r2 0xc
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
- call #strand_wait
- mov $r2 0x47fc
- sethi $r2 0x20000
- iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
- xbit $r2 $flags $p1
- add b32 $r2 3
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
-
- // mmio context
- xbit $r10 $flags $p1 // direction
- or $r10 2 // first
- mov $r11 0x0000
- sethi $r11 0x500000
- ld b32 $r12 D[$r0 + #gpc_id]
- shl b32 $r12 15
- add b32 $r11 $r12 // base = NV_PGRAPH_GPCn
- ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
- ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
- mov $r14 0 // not multi
- call #mmctx_xfer
-
- // per-TPC mmio context
- xbit $r10 $flags $p1 // direction
- or $r10 4 // last
- mov $r11 0x4000
- sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0
- ld b32 $r12 D[$r0 + #gpc_id]
- shl b32 $r12 15
- add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0
- ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
- ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
- ld b32 $r15 D[$r0 + #tpc_mask]
- mov $r14 0x800 // stride = 0x800
- call #mmctx_xfer
-
- // wait for strands to finish
- call #strand_wait
-
- // if load, or a save without a load following, do some
- // unknown stuff that's done after finishing a block of
- // strand commands
- bra $p1 #ctx_xfer_post
- bra not $p2 #ctx_xfer_done
- ctx_xfer_post:
- mov $r1 0x4afc
- sethi $r1 0x20000
- mov $r2 0xd
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d
- call #strand_wait
-
- // mark completion in HUB's barrier
- ctx_xfer_done:
- call #hub_barrier_done
- ret
-
+#include "com.fuc"
+#include "gpc.fuc"
.align 256
+#undef INCLUDE_CODE
0x0089d000,
0x081887f1,
0xd00684b6,
-/* 0x00e2: wait_done_wait_donez */
+/* 0x00e2: wait_donez_ne */
0x87f1008a,
0x84b60400,
0x0088cf06,
0x87f10089,
0x84b60818,
0x008ad006,
-/* 0x011c: wait_done_wait_doneo */
+/* 0x011c: wait_doneo_e */
0x040087f1,
0xcf0684b6,
0x8aff0088,
--- /dev/null
+/* fuc microcode for nvc0 PGRAPH/HUB
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#ifdef INCLUDE_DATA
+gpc_count: .b32 0
+rop_count: .b32 0
+cmd_queue: queue_init
+hub_mmio_list_head: .b32 0
+hub_mmio_list_tail: .b32 0
+
+ctx_current: .b32 0
+
+.align 256
+chan_data:
+chan_mmio_count: .b32 0
+chan_mmio_address: .b32 0
+
+.align 256
+xfer_data: .skip 256
+
+#endif
+
+#ifdef INCLUDE_CODE
+// reports an exception to the host
+//
+// In: $r15 error code (see nvc0.fuc)
+//
+error:
+ push $r14
+ mov $r14 0x814
+ shl b32 $r14 6
+ iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
+ mov $r14 0xc1c
+ shl b32 $r14 6
+ mov $r15 1
+ iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
+ pop $r14
+ ret
+
+// HUB fuc initialisation, executed by triggering ucode start, will
+// fall through to main loop after completion.
+//
+// Input:
+// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
+//
+// Output:
+// CC_SCRATCH[0]:
+// 31:31: set to signal completion
+// CC_SCRATCH[1]:
+// 31:0: total PGRAPH context size
+//
+init:
+ clear b32 $r0
+ mov $sp $r0
+ mov $xdbase $r0
+
+ // enable fifo access
+ mov $r1 0x1200
+ mov $r2 2
+ iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
+
+ // setup i0 handler, and route all interrupts to it
+ mov $r1 #ih
+ mov $iv0 $r1
+ mov $r1 0x400
+ iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
+
+ // route HUB_CHANNEL_SWITCH to fuc interrupt 8
+ mov $r3 0x404
+ shl b32 $r3 6
+ mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
+ iowr I[$r3 + 0x000] $r2
+
+ // not sure what these are, route them because NVIDIA does, and
+ // the IRQ handler will signal the host if we ever get one.. we
+ // may find out if/why we need to handle these if so..
+ //
+ mov $r2 0x2004
+ iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
+ mov $r2 0x200b
+ iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
+ mov $r2 0x200c
+ iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
+
+ // enable all INTR_UP interrupts
+ mov $r2 0xc24
+ shl b32 $r2 6
+ not b32 $r3 $r0
+ iowr I[$r2] $r3
+
+ // enable fifo, ctxsw, 9, 10, 15 interrupts
+ mov $r2 -0x78fc // 0x8704
+ sethi $r2 0
+ iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
+
+ // fifo level triggered, rest edge
+ sub b32 $r1 0x100
+ mov $r2 4
+ iowr I[$r1] $r2
+
+ // enable interrupts
+ bset $flags ie0
+
+ // fetch enabled GPC/ROP counts
+ mov $r14 -0x69fc // 0x409604
+ sethi $r14 0x400000
+ call #nv_rd32
+ extr $r1 $r15 16:20
+ st b32 D[$r0 + #rop_count] $r1
+ and $r15 0x1f
+ st b32 D[$r0 + #gpc_count] $r15
+
+ // set BAR_REQMASK to GPC mask
+ mov $r1 1
+ shl b32 $r1 $r15
+ sub b32 $r1 1
+ mov $r2 0x40c
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r1
+ iowr I[$r2 + 0x100] $r1
+
+ // find context data for this chipset
+ mov $r2 0x800
+ shl b32 $r2 6
+ iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
+ mov $r15 #chipsets - 8
+ init_find_chipset:
+ add b32 $r15 8
+ ld b32 $r3 D[$r15 + 0x00]
+ cmpu b32 $r3 $r2
+ bra e #init_context
+ cmpu b32 $r3 0
+ bra ne #init_find_chipset
+ // unknown chipset
+ ret
+
+ // context size calculation, reserve first 256 bytes for use by fuc
+ init_context:
+ mov $r1 256
+
+ // calculate size of mmio context data
+ ld b16 $r14 D[$r15 + 4]
+ ld b16 $r15 D[$r15 + 6]
+ sethi $r14 0
+ st b32 D[$r0 + #hub_mmio_list_head] $r14
+ st b32 D[$r0 + #hub_mmio_list_tail] $r15
+ call #mmctx_size
+
+ // set mmctx base addresses now so we don't have to do it later,
+ // they don't (currently) ever change
+ mov $r3 0x700
+ shl b32 $r3 6
+ shr b32 $r4 $r1 8
+ iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
+ iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
+ add b32 $r3 0x1300
+ add b32 $r1 $r15
+ shr b32 $r15 2
+ iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
+
+ // strands, base offset needs to be aligned to 256 bytes
+ shr b32 $r1 8
+ add b32 $r1 1
+ shl b32 $r1 8
+ mov b32 $r15 $r1
+ call #strand_ctx_init
+ add b32 $r1 $r15
+
+ // initialise each GPC in sequence by passing in the offset of its
+ // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
+ // has previously been uploaded by the host) running.
+ //
+ // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
+ // when it has completed, and return the size of its context data
+ // in GPCn_CC_SCRATCH[1]
+ //
+ ld b32 $r3 D[$r0 + #gpc_count]
+ mov $r4 0x2000
+ sethi $r4 0x500000
+ init_gpc:
+ // setup, and start GPC ucode running
+ add b32 $r14 $r4 0x804
+ mov b32 $r15 $r1
+ call #nv_wr32 // CC_SCRATCH[1] = ctx offset
+ add b32 $r14 $r4 0x800
+ mov b32 $r15 $r2
+ call #nv_wr32 // CC_SCRATCH[0] = chipset
+ add b32 $r14 $r4 0x10c
+ clear b32 $r15
+ call #nv_wr32
+ add b32 $r14 $r4 0x104
+ call #nv_wr32 // ENTRY
+ add b32 $r14 $r4 0x100
+ mov $r15 2 // CTRL_START_TRIGGER
+ call #nv_wr32 // CTRL
+
+ // wait for it to complete, and adjust context size
+ add b32 $r14 $r4 0x800
+ init_gpc_wait:
+ call #nv_rd32
+ xbit $r15 $r15 31
+ bra e #init_gpc_wait
+ add b32 $r14 $r4 0x804
+ call #nv_rd32
+ add b32 $r1 $r15
+
+ // next!
+ add b32 $r4 0x8000
+ sub b32 $r3 1
+ bra ne #init_gpc
+
+ // save context size, and tell host we're ready
+ mov $r2 0x800
+ shl b32 $r2 6
+ iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
+ add b32 $r2 0x800
+ clear b32 $r1
+ bset $r1 31
+ iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
+
+// Main program loop, very simple, sleeps until woken up by the interrupt
+// handler, pulls a command from the queue and executes its handler
+//
+main:
+ // sleep until we have something to do
+ bset $flags $p0
+ sleep $p0
+ mov $r13 #cmd_queue
+ call #queue_get
+ bra $p1 #main
+
+ // context switch, requested by GPU?
+ cmpu b32 $r14 0x4001
+ bra ne #main_not_ctx_switch
+ trace_set(T_AUTO)
+ mov $r1 0xb00
+ shl b32 $r1 6
+ iord $r2 I[$r1 + 0x100] // CHAN_NEXT
+ iord $r1 I[$r1 + 0x000] // CHAN_CUR
+
+ xbit $r3 $r1 31
+ bra e #chsw_no_prev
+ xbit $r3 $r2 31
+ bra e #chsw_prev_no_next
+ push $r2
+ mov b32 $r2 $r1
+ trace_set(T_SAVE)
+ bclr $flags $p1
+ bset $flags $p2
+ call #ctx_xfer
+ trace_clr(T_SAVE);
+ pop $r2
+ trace_set(T_LOAD);
+ bset $flags $p1
+ call #ctx_xfer
+ trace_clr(T_LOAD);
+ bra #chsw_done
+ chsw_prev_no_next:
+ push $r2
+ mov b32 $r2 $r1
+ bclr $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+ pop $r2
+ mov $r1 0xb00
+ shl b32 $r1 6
+ iowr I[$r1] $r2
+ bra #chsw_done
+ chsw_no_prev:
+ xbit $r3 $r2 31
+ bra e #chsw_done
+ bset $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+
+ // ack the context switch request
+ chsw_done:
+ mov $r1 0xb0c
+ shl b32 $r1 6
+ mov $r2 1
+ iowr I[$r1 + 0x000] $r2 // 0x409b0c
+ trace_clr(T_AUTO)
+ bra #main
+
+ // request to set current channel? (*not* a context switch)
+ main_not_ctx_switch:
+ cmpu b32 $r14 0x0001
+ bra ne #main_not_ctx_chan
+ mov b32 $r2 $r15
+ call #ctx_chan
+ bra #main_done
+
+ // request to store current channel context?
+ main_not_ctx_chan:
+ cmpu b32 $r14 0x0002
+ bra ne #main_not_ctx_save
+ trace_set(T_SAVE)
+ bclr $flags $p1
+ bclr $flags $p2
+ call #ctx_xfer
+ trace_clr(T_SAVE)
+ bra #main_done
+
+ main_not_ctx_save:
+ shl b32 $r15 $r14 16
+ or $r15 E_BAD_COMMAND
+ call #error
+ bra #main
+
+ main_done:
+ mov $r1 0x820
+ shl b32 $r1 6
+ clear b32 $r2
+ bset $r2 31
+ iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
+ bra #main
+
+// interrupt handler
+ih:
+ push $r8
+ mov $r8 $flags
+ push $r8
+ push $r9
+ push $r10
+ push $r11
+ push $r13
+ push $r14
+ push $r15
+
+ // incoming fifo command?
+ iord $r10 I[$r0 + 0x200] // INTR
+ and $r11 $r10 0x00000004
+ bra e #ih_no_fifo
+ // queue incoming fifo command for later processing
+ mov $r11 0x1900
+ mov $r13 #cmd_queue
+ iord $r14 I[$r11 + 0x100] // FIFO_CMD
+ iord $r15 I[$r11 + 0x000] // FIFO_DATA
+ call #queue_put
+ add b32 $r11 0x400
+ mov $r14 1
+ iowr I[$r11 + 0x000] $r14 // FIFO_ACK
+
+ // context switch request?
+ ih_no_fifo:
+ and $r11 $r10 0x00000100
+ bra e #ih_no_ctxsw
+ // enqueue a context switch for later processing
+ mov $r13 #cmd_queue
+ mov $r14 0x4001
+ call #queue_put
+
+ // anything we didn't handle, bring it to the host's attention
+ ih_no_ctxsw:
+ mov $r11 0x104
+ not b32 $r11
+ and $r11 $r10 $r11
+ bra e #ih_no_other
+ mov $r10 0xc1c
+ shl b32 $r10 6
+ iowr I[$r10] $r11 // INTR_UP_SET
+
+ // ack, and wake up main()
+ ih_no_other:
+ iowr I[$r0 + 0x100] $r10 // INTR_ACK
+
+ pop $r15
+ pop $r14
+ pop $r13
+ pop $r11
+ pop $r10
+ pop $r9
+ pop $r8
+ mov $flags $r8
+ pop $r8
+ bclr $flags $p0
+ iret
+
+#ifdef NVGF
+// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
+ctx_4160s:
+ mov $r14 0x4160
+ sethi $r14 0x400000
+ mov $r15 1
+ call #nv_wr32
+ ctx_4160s_wait:
+ call #nv_rd32
+ xbit $r15 $r15 4
+ bra e #ctx_4160s_wait
+ ret
+
+// Without clearing again at end of xfer, some things cause PGRAPH
+// to hang with STATUS=0x00000007 until it's cleared.. fbcon can
+// still function with it set however...
+ctx_4160c:
+ mov $r14 0x4160
+ sethi $r14 0x400000
+ clear b32 $r15
+ call #nv_wr32
+ ret
+#endif
+
+// Again, not real sure
+//
+// In: $r15 value to set 0x404170 to
+//
+ctx_4170s:
+ mov $r14 0x4170
+ sethi $r14 0x400000
+ or $r15 0x10
+ call #nv_wr32
+ ret
+
+// Waits for a ctx_4170s() call to complete
+//
+ctx_4170w:
+ mov $r14 0x4170
+ sethi $r14 0x400000
+ call #nv_rd32
+ and $r15 0x10
+ bra ne #ctx_4170w
+ ret
+
+// Disables various things, waits a bit, and re-enables them..
+//
+// Not sure how exactly this helps, perhaps "ENABLE" is not such a
+// good description for the bits we turn off? Anyways, without this,
+// funny things happen.
+//
+ctx_redswitch:
+ mov $r14 0x614
+ shl b32 $r14 6
+ mov $r15 0x270
+ iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
+ mov $r15 8
+ ctx_redswitch_delay:
+ sub b32 $r15 1
+ bra ne #ctx_redswitch_delay
+ mov $r15 0x770
+ iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
+ ret
+
+// Not a clue what this is for, except that unless the value is 0x10, the
+// strand context is saved (and presumably restored) incorrectly..
+//
+// In: $r15 value to set to (0x00/0x10 are used)
+//
+ctx_86c:
+ mov $r14 0x86c
+ shl b32 $r14 6
+ iowr I[$r14] $r15 // HUB(0x86c) = val
+ mov $r14 -0x75ec
+ sethi $r14 0x400000
+ call #nv_wr32 // ROP(0xa14) = val
+ mov $r14 -0x5794
+ sethi $r14 0x410000
+ call #nv_wr32 // GPC(0x86c) = val
+ ret
+
+// ctx_load - load's a channel's ctxctl data, and selects its vm
+//
+// In: $r2 channel address
+//
+ctx_load:
+ trace_set(T_CHAN)
+
+ // switch to channel, somewhat magic in parts..
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa24
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r0 // 0x409a24
+ mov $r3 0xb00
+ shl b32 $r3 6
+ iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
+ mov $r1 0xa0c
+ shl b32 $r1 6
+ mov $r4 7
+ iowr I[$r1 + 0x000] $r2 // MEM_CHAN
+ iowr I[$r1 + 0x100] $r4 // MEM_CMD
+ ctx_chan_wait_0:
+ iord $r4 I[$r1 + 0x100]
+ and $r4 0x1f
+ bra ne #ctx_chan_wait_0
+ iowr I[$r3 + 0x000] $r2 // CHAN_CUR
+
+ // load channel header, fetch PGRAPH context pointer
+ mov $xtargets $r0
+ bclr $r2 31
+ shl b32 $r2 4
+ add b32 $r2 2
+
+ trace_set(T_LCHAN)
+ mov $r1 0xa04
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r2 // MEM_BASE
+ mov $r1 0xa20
+ shl b32 $r1 6
+ mov $r2 0x0002
+ sethi $r2 0x80000000
+ iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
+ mov $r1 0x10 // chan + 0x0210
+ mov $r2 #xfer_data
+ sethi $r2 0x00020000 // 16 bytes
+ xdld $r1 $r2
+ xdwait
+ trace_clr(T_LCHAN)
+
+ // update current context
+ ld b32 $r1 D[$r0 + #xfer_data + 4]
+ shl b32 $r1 24
+ ld b32 $r2 D[$r0 + #xfer_data + 0]
+ shr b32 $r2 8
+ or $r1 $r2
+ st b32 D[$r0 + #ctx_current] $r1
+
+ // set transfer base to start of context, and fetch context header
+ trace_set(T_LCTXH)
+ mov $r2 0xa04
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r1 // MEM_BASE
+ mov $r2 1
+ mov $r1 0xa20
+ shl b32 $r1 6
+ iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
+ mov $r1 #chan_data
+ sethi $r1 0x00060000 // 256 bytes
+ xdld $r0 $r1
+ xdwait
+ trace_clr(T_LCTXH)
+
+ trace_clr(T_CHAN)
+ ret
+
+// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
+// the active channel for ctxctl, but not actually transfer
+// any context data. intended for use only during initial
+// context construction.
+//
+// In: $r2 channel address
+//
+ctx_chan:
+#ifdef NVGF
+ call #ctx_4160s
+#endif
+ call #ctx_load
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa10
+ shl b32 $r1 6
+ mov $r2 5
+ iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
+ ctx_chan_wait:
+ iord $r2 I[$r1 + 0x000]
+ or $r2 $r2
+ bra ne #ctx_chan_wait
+#ifdef NVGF
+ call #ctx_4160c
+#endif
+ ret
+
+// Execute per-context state overrides list
+//
+// Only executed on the first load of a channel. Might want to look into
+// removing this and having the host directly modify the channel's context
+// to change this state... The nouveau DRM already builds this list as
+// it's definitely needed for NVIDIA's, so we may as well use it for now
+//
+// Input: $r1 mmio list length
+//
+ctx_mmio_exec:
+ // set transfer base to be the mmio list
+ ld b32 $r3 D[$r0 + #chan_mmio_address]
+ mov $r2 0xa04
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r3 // MEM_BASE
+
+ clear b32 $r3
+ ctx_mmio_loop:
+ // fetch next 256 bytes of mmio list if necessary
+ and $r4 $r3 0xff
+ bra ne #ctx_mmio_pull
+ mov $r5 #xfer_data
+ sethi $r5 0x00060000 // 256 bytes
+ xdld $r3 $r5
+ xdwait
+
+ // execute a single list entry
+ ctx_mmio_pull:
+ ld b32 $r14 D[$r4 + #xfer_data + 0x00]
+ ld b32 $r15 D[$r4 + #xfer_data + 0x04]
+ call #nv_wr32
+
+ // next!
+ add b32 $r3 8
+ sub b32 $r1 1
+ bra ne #ctx_mmio_loop
+
+ // set transfer base back to the current context
+ ctx_mmio_done:
+ ld b32 $r3 D[$r0 + #ctx_current]
+ iowr I[$r2 + 0x000] $r3 // MEM_BASE
+
+ // disable the mmio list now, we don't need/want to execute it again
+ st b32 D[$r0 + #chan_mmio_count] $r0
+ mov $r1 #chan_data
+ sethi $r1 0x00060000 // 256 bytes
+ xdst $r0 $r1
+ xdwait
+ ret
+
+// Transfer HUB context data between GPU and storage area
+//
+// In: $r2 channel address
+// $p1 clear on save, set on load
+// $p2 set if opposite direction done/will be done, so:
+// on save it means: "a load will follow this save"
+// on load it means: "a save preceeded this load"
+//
+ctx_xfer:
+ // according to mwk, some kind of wait for idle
+ mov $r15 0xc00
+ shl b32 $r15 6
+ mov $r14 4
+ iowr I[$r15 + 0x200] $r14
+ ctx_xfer_idle:
+ iord $r14 I[$r15 + 0x000]
+ and $r14 0x2000
+ bra ne #ctx_xfer_idle
+
+ bra not $p1 #ctx_xfer_pre
+ bra $p2 #ctx_xfer_pre_load
+ ctx_xfer_pre:
+ mov $r15 0x10
+ call #ctx_86c
+#ifdef NVGF
+ call #ctx_4160s
+#endif
+ bra not $p1 #ctx_xfer_exec
+
+ ctx_xfer_pre_load:
+ mov $r15 2
+ call #ctx_4170s
+ call #ctx_4170w
+ call #ctx_redswitch
+ clear b32 $r15
+ call #ctx_4170s
+ call #ctx_load
+
+ // fetch context pointer, and initiate xfer on all GPCs
+ ctx_xfer_exec:
+ ld b32 $r1 D[$r0 + #ctx_current]
+ mov $r2 0x414
+ shl b32 $r2 6
+ iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
+ mov $r14 -0x5b00
+ sethi $r14 0x410000
+ mov b32 $r15 $r1
+ call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
+ add b32 $r14 4
+ xbit $r15 $flags $p1
+ xbit $r2 $flags $p2
+ shl b32 $r2 1
+ or $r15 $r2
+ call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
+
+ // strands
+ mov $r1 0x4afc
+ sethi $r1 0x20000
+ mov $r2 0xc
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
+ call #strand_wait
+ mov $r2 0x47fc
+ sethi $r2 0x20000
+ iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
+ xbit $r2 $flags $p1
+ add b32 $r2 3
+ iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
+
+ // mmio context
+ xbit $r10 $flags $p1 // direction
+ or $r10 6 // first, last
+ mov $r11 0 // base = 0
+ ld b32 $r12 D[$r0 + #hub_mmio_list_head]
+ ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
+ mov $r14 0 // not multi
+ call #mmctx_xfer
+
+ // wait for GPCs to all complete
+ mov $r10 8 // DONE_BAR
+ call #wait_doneo
+
+ // wait for strand xfer to complete
+ call #strand_wait
+
+ // post-op
+ bra $p1 #ctx_xfer_post
+ mov $r10 12 // DONE_UNK12
+ call #wait_donez
+ mov $r1 0xa10
+ shl b32 $r1 6
+ mov $r2 5
+ iowr I[$r1] $r2 // MEM_CMD
+ ctx_xfer_post_save_wait:
+ iord $r2 I[$r1]
+ or $r2 $r2
+ bra ne #ctx_xfer_post_save_wait
+
+ bra $p2 #ctx_xfer_done
+ ctx_xfer_post:
+ mov $r15 2
+ call #ctx_4170s
+ clear b32 $r15
+ call #ctx_86c
+ call #strand_post
+ call #ctx_4170w
+ clear b32 $r15
+ call #ctx_4170s
+
+ bra not $p1 #ctx_xfer_no_post_mmio
+ ld b32 $r1 D[$r0 + #chan_mmio_count]
+ or $r1 $r1
+ bra e #ctx_xfer_no_post_mmio
+ call #ctx_mmio_exec
+
+ ctx_xfer_no_post_mmio:
+#ifdef NVGF
+ call #ctx_4160c
+#endif
+
+ ctx_xfer_done:
+ ret
+#endif
-/* fuc microcode for nvc0 PGRAPH/HUB
- *
- * Copyright 2011 Red Hat Inc.
+/*
+ * Copyright 2013 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Authors: Ben Skeggs
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
*/
-/* To build:
- * m4 hubnvc0.fuc | envyas -a -w -m fuc -V fuc3 -o hubnvc0.fuc.h
- */
+#define NVGF
+#include "macros.fuc"
.section #nvc0_grhub_data
-include(`nvc0.fuc')
-gpc_count: .b32 0
-rop_count: .b32 0
-cmd_queue: queue_init
-hub_mmio_list_head: .b32 0
-hub_mmio_list_tail: .b32 0
-
-ctx_current: .b32 0
-
-.align 256
-chan_data:
-chan_mmio_count: .b32 0
-chan_mmio_address: .b32 0
+#define INCLUDE_DATA
+#include "com.fuc"
+#include "hub.fuc"
-.align 256
-xfer_data: .b32 0
-
-.align 256
chipsets:
.b8 0xc0 0 0 0
.b16 #nvc0_hub_mmio_head
nvc1_hub_mmio_tail:
mmctx_data(0x4064bc, 3)
nvd9_hub_mmio_tail:
+#undef INCLUDE_DATA
.section #nvc0_grhub_code
+#define INCLUDE_CODE
bra #init
-define(`include_code')
-include(`nvc0.fuc')
-
-// reports an exception to the host
-//
-// In: $r15 error code (see nvc0.fuc)
-//
-error:
- push $r14
- mov $r14 0x814
- shl b32 $r14 6
- iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
- mov $r14 0xc1c
- shl b32 $r14 6
- mov $r15 1
- iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
- pop $r14
- ret
-
-// HUB fuc initialisation, executed by triggering ucode start, will
-// fall through to main loop after completion.
-//
-// Input:
-// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
-//
-// Output:
-// CC_SCRATCH[0]:
-// 31:31: set to signal completion
-// CC_SCRATCH[1]:
-// 31:0: total PGRAPH context size
-//
-init:
- clear b32 $r0
- mov $sp $r0
- mov $xdbase $r0
-
- // enable fifo access
- mov $r1 0x1200
- mov $r2 2
- iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
-
- // setup i0 handler, and route all interrupts to it
- mov $r1 #ih
- mov $iv0 $r1
- mov $r1 0x400
- iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
-
- // route HUB_CHANNEL_SWITCH to fuc interrupt 8
- mov $r3 0x404
- shl b32 $r3 6
- mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
- iowr I[$r3 + 0x000] $r2
-
- // not sure what these are, route them because NVIDIA does, and
- // the IRQ handler will signal the host if we ever get one.. we
- // may find out if/why we need to handle these if so..
- //
- mov $r2 0x2004
- iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
- mov $r2 0x200b
- iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
- mov $r2 0x200c
- iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
-
- // enable all INTR_UP interrupts
- mov $r2 0xc24
- shl b32 $r2 6
- not b32 $r3 $r0
- iowr I[$r2] $r3
-
- // enable fifo, ctxsw, 9, 10, 15 interrupts
- mov $r2 -0x78fc // 0x8704
- sethi $r2 0
- iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
-
- // fifo level triggered, rest edge
- sub b32 $r1 0x100
- mov $r2 4
- iowr I[$r1] $r2
-
- // enable interrupts
- bset $flags ie0
-
- // fetch enabled GPC/ROP counts
- mov $r14 -0x69fc // 0x409604
- sethi $r14 0x400000
- call #nv_rd32
- extr $r1 $r15 16:20
- st b32 D[$r0 + #rop_count] $r1
- and $r15 0x1f
- st b32 D[$r0 + #gpc_count] $r15
-
- // set BAR_REQMASK to GPC mask
- mov $r1 1
- shl b32 $r1 $r15
- sub b32 $r1 1
- mov $r2 0x40c
- shl b32 $r2 6
- iowr I[$r2 + 0x000] $r1
- iowr I[$r2 + 0x100] $r1
-
- // find context data for this chipset
- mov $r2 0x800
- shl b32 $r2 6
- iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
- mov $r15 #chipsets - 8
- init_find_chipset:
- add b32 $r15 8
- ld b32 $r3 D[$r15 + 0x00]
- cmpu b32 $r3 $r2
- bra e #init_context
- cmpu b32 $r3 0
- bra ne #init_find_chipset
- // unknown chipset
- ret
-
- // context size calculation, reserve first 256 bytes for use by fuc
- init_context:
- mov $r1 256
-
- // calculate size of mmio context data
- ld b16 $r14 D[$r15 + 4]
- ld b16 $r15 D[$r15 + 6]
- sethi $r14 0
- st b32 D[$r0 + #hub_mmio_list_head] $r14
- st b32 D[$r0 + #hub_mmio_list_tail] $r15
- call #mmctx_size
-
- // set mmctx base addresses now so we don't have to do it later,
- // they don't (currently) ever change
- mov $r3 0x700
- shl b32 $r3 6
- shr b32 $r4 $r1 8
- iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
- iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
- add b32 $r3 0x1300
- add b32 $r1 $r15
- shr b32 $r15 2
- iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
-
- // strands, base offset needs to be aligned to 256 bytes
- shr b32 $r1 8
- add b32 $r1 1
- shl b32 $r1 8
- mov b32 $r15 $r1
- call #strand_ctx_init
- add b32 $r1 $r15
-
- // initialise each GPC in sequence by passing in the offset of its
- // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
- // has previously been uploaded by the host) running.
- //
- // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
- // when it has completed, and return the size of its context data
- // in GPCn_CC_SCRATCH[1]
- //
- ld b32 $r3 D[$r0 + #gpc_count]
- mov $r4 0x2000
- sethi $r4 0x500000
- init_gpc:
- // setup, and start GPC ucode running
- add b32 $r14 $r4 0x804
- mov b32 $r15 $r1
- call #nv_wr32 // CC_SCRATCH[1] = ctx offset
- add b32 $r14 $r4 0x800
- mov b32 $r15 $r2
- call #nv_wr32 // CC_SCRATCH[0] = chipset
- add b32 $r14 $r4 0x10c
- clear b32 $r15
- call #nv_wr32
- add b32 $r14 $r4 0x104
- call #nv_wr32 // ENTRY
- add b32 $r14 $r4 0x100
- mov $r15 2 // CTRL_START_TRIGGER
- call #nv_wr32 // CTRL
-
- // wait for it to complete, and adjust context size
- add b32 $r14 $r4 0x800
- init_gpc_wait:
- call #nv_rd32
- xbit $r15 $r15 31
- bra e #init_gpc_wait
- add b32 $r14 $r4 0x804
- call #nv_rd32
- add b32 $r1 $r15
-
- // next!
- add b32 $r4 0x8000
- sub b32 $r3 1
- bra ne #init_gpc
-
- // save context size, and tell host we're ready
- mov $r2 0x800
- shl b32 $r2 6
- iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
- add b32 $r2 0x800
- clear b32 $r1
- bset $r1 31
- iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
-
-// Main program loop, very simple, sleeps until woken up by the interrupt
-// handler, pulls a command from the queue and executes its handler
-//
-main:
- // sleep until we have something to do
- bset $flags $p0
- sleep $p0
- mov $r13 #cmd_queue
- call #queue_get
- bra $p1 #main
-
- // context switch, requested by GPU?
- cmpu b32 $r14 0x4001
- bra ne #main_not_ctx_switch
- trace_set(T_AUTO)
- mov $r1 0xb00
- shl b32 $r1 6
- iord $r2 I[$r1 + 0x100] // CHAN_NEXT
- iord $r1 I[$r1 + 0x000] // CHAN_CUR
-
- xbit $r3 $r1 31
- bra e #chsw_no_prev
- xbit $r3 $r2 31
- bra e #chsw_prev_no_next
- push $r2
- mov b32 $r2 $r1
- trace_set(T_SAVE)
- bclr $flags $p1
- bset $flags $p2
- call #ctx_xfer
- trace_clr(T_SAVE);
- pop $r2
- trace_set(T_LOAD);
- bset $flags $p1
- call #ctx_xfer
- trace_clr(T_LOAD);
- bra #chsw_done
- chsw_prev_no_next:
- push $r2
- mov b32 $r2 $r1
- bclr $flags $p1
- bclr $flags $p2
- call #ctx_xfer
- pop $r2
- mov $r1 0xb00
- shl b32 $r1 6
- iowr I[$r1] $r2
- bra #chsw_done
- chsw_no_prev:
- xbit $r3 $r2 31
- bra e #chsw_done
- bset $flags $p1
- bclr $flags $p2
- call #ctx_xfer
-
- // ack the context switch request
- chsw_done:
- mov $r1 0xb0c
- shl b32 $r1 6
- mov $r2 1
- iowr I[$r1 + 0x000] $r2 // 0x409b0c
- trace_clr(T_AUTO)
- bra #main
-
- // request to set current channel? (*not* a context switch)
- main_not_ctx_switch:
- cmpu b32 $r14 0x0001
- bra ne #main_not_ctx_chan
- mov b32 $r2 $r15
- call #ctx_chan
- bra #main_done
-
- // request to store current channel context?
- main_not_ctx_chan:
- cmpu b32 $r14 0x0002
- bra ne #main_not_ctx_save
- trace_set(T_SAVE)
- bclr $flags $p1
- bclr $flags $p2
- call #ctx_xfer
- trace_clr(T_SAVE)
- bra #main_done
-
- main_not_ctx_save:
- shl b32 $r15 $r14 16
- or $r15 E_BAD_COMMAND
- call #error
- bra #main
-
- main_done:
- mov $r1 0x820
- shl b32 $r1 6
- clear b32 $r2
- bset $r2 31
- iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
- bra #main
-
-// interrupt handler
-ih:
- push $r8
- mov $r8 $flags
- push $r8
- push $r9
- push $r10
- push $r11
- push $r13
- push $r14
- push $r15
-
- // incoming fifo command?
- iord $r10 I[$r0 + 0x200] // INTR
- and $r11 $r10 0x00000004
- bra e #ih_no_fifo
- // queue incoming fifo command for later processing
- mov $r11 0x1900
- mov $r13 #cmd_queue
- iord $r14 I[$r11 + 0x100] // FIFO_CMD
- iord $r15 I[$r11 + 0x000] // FIFO_DATA
- call #queue_put
- add b32 $r11 0x400
- mov $r14 1
- iowr I[$r11 + 0x000] $r14 // FIFO_ACK
-
- // context switch request?
- ih_no_fifo:
- and $r11 $r10 0x00000100
- bra e #ih_no_ctxsw
- // enqueue a context switch for later processing
- mov $r13 #cmd_queue
- mov $r14 0x4001
- call #queue_put
-
- // anything we didn't handle, bring it to the host's attention
- ih_no_ctxsw:
- mov $r11 0x104
- not b32 $r11
- and $r11 $r10 $r11
- bra e #ih_no_other
- mov $r10 0xc1c
- shl b32 $r10 6
- iowr I[$r10] $r11 // INTR_UP_SET
-
- // ack, and wake up main()
- ih_no_other:
- iowr I[$r0 + 0x100] $r10 // INTR_ACK
-
- pop $r15
- pop $r14
- pop $r13
- pop $r11
- pop $r10
- pop $r9
- pop $r8
- mov $flags $r8
- pop $r8
- bclr $flags $p0
- iret
-
-// Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
-ctx_4160s:
- mov $r14 0x4160
- sethi $r14 0x400000
- mov $r15 1
- call #nv_wr32
- ctx_4160s_wait:
- call #nv_rd32
- xbit $r15 $r15 4
- bra e #ctx_4160s_wait
- ret
-
-// Without clearing again at end of xfer, some things cause PGRAPH
-// to hang with STATUS=0x00000007 until it's cleared.. fbcon can
-// still function with it set however...
-ctx_4160c:
- mov $r14 0x4160
- sethi $r14 0x400000
- clear b32 $r15
- call #nv_wr32
- ret
-
-// Again, not real sure
-//
-// In: $r15 value to set 0x404170 to
-//
-ctx_4170s:
- mov $r14 0x4170
- sethi $r14 0x400000
- or $r15 0x10
- call #nv_wr32
- ret
-
-// Waits for a ctx_4170s() call to complete
-//
-ctx_4170w:
- mov $r14 0x4170
- sethi $r14 0x400000
- call #nv_rd32
- and $r15 0x10
- bra ne #ctx_4170w
- ret
-
-// Disables various things, waits a bit, and re-enables them..
-//
-// Not sure how exactly this helps, perhaps "ENABLE" is not such a
-// good description for the bits we turn off? Anyways, without this,
-// funny things happen.
-//
-ctx_redswitch:
- mov $r14 0x614
- shl b32 $r14 6
- mov $r15 0x270
- iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
- mov $r15 8
- ctx_redswitch_delay:
- sub b32 $r15 1
- bra ne #ctx_redswitch_delay
- mov $r15 0x770
- iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
- ret
-
-// Not a clue what this is for, except that unless the value is 0x10, the
-// strand context is saved (and presumably restored) incorrectly..
-//
-// In: $r15 value to set to (0x00/0x10 are used)
-//
-ctx_86c:
- mov $r14 0x86c
- shl b32 $r14 6
- iowr I[$r14] $r15 // HUB(0x86c) = val
- mov $r14 -0x75ec
- sethi $r14 0x400000
- call #nv_wr32 // ROP(0xa14) = val
- mov $r14 -0x5794
- sethi $r14 0x410000
- call #nv_wr32 // GPC(0x86c) = val
- ret
-
-// ctx_load - load's a channel's ctxctl data, and selects its vm
-//
-// In: $r2 channel address
-//
-ctx_load:
- trace_set(T_CHAN)
-
- // switch to channel, somewhat magic in parts..
- mov $r10 12 // DONE_UNK12
- call #wait_donez
- mov $r1 0xa24
- shl b32 $r1 6
- iowr I[$r1 + 0x000] $r0 // 0x409a24
- mov $r3 0xb00
- shl b32 $r3 6
- iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
- mov $r1 0xa0c
- shl b32 $r1 6
- mov $r4 7
- iowr I[$r1 + 0x000] $r2 // MEM_CHAN
- iowr I[$r1 + 0x100] $r4 // MEM_CMD
- ctx_chan_wait_0:
- iord $r4 I[$r1 + 0x100]
- and $r4 0x1f
- bra ne #ctx_chan_wait_0
- iowr I[$r3 + 0x000] $r2 // CHAN_CUR
-
- // load channel header, fetch PGRAPH context pointer
- mov $xtargets $r0
- bclr $r2 31
- shl b32 $r2 4
- add b32 $r2 2
-
- trace_set(T_LCHAN)
- mov $r1 0xa04
- shl b32 $r1 6
- iowr I[$r1 + 0x000] $r2 // MEM_BASE
- mov $r1 0xa20
- shl b32 $r1 6
- mov $r2 0x0002
- sethi $r2 0x80000000
- iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
- mov $r1 0x10 // chan + 0x0210
- mov $r2 #xfer_data
- sethi $r2 0x00020000 // 16 bytes
- xdld $r1 $r2
- xdwait
- trace_clr(T_LCHAN)
-
- // update current context
- ld b32 $r1 D[$r0 + #xfer_data + 4]
- shl b32 $r1 24
- ld b32 $r2 D[$r0 + #xfer_data + 0]
- shr b32 $r2 8
- or $r1 $r2
- st b32 D[$r0 + #ctx_current] $r1
-
- // set transfer base to start of context, and fetch context header
- trace_set(T_LCTXH)
- mov $r2 0xa04
- shl b32 $r2 6
- iowr I[$r2 + 0x000] $r1 // MEM_BASE
- mov $r2 1
- mov $r1 0xa20
- shl b32 $r1 6
- iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
- mov $r1 #chan_data
- sethi $r1 0x00060000 // 256 bytes
- xdld $r0 $r1
- xdwait
- trace_clr(T_LCTXH)
-
- trace_clr(T_CHAN)
- ret
-
-// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
-// the active channel for ctxctl, but not actually transfer
-// any context data. intended for use only during initial
-// context construction.
-//
-// In: $r2 channel address
-//
-ctx_chan:
- call #ctx_4160s
- call #ctx_load
- mov $r10 12 // DONE_UNK12
- call #wait_donez
- mov $r1 0xa10
- shl b32 $r1 6
- mov $r2 5
- iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
- ctx_chan_wait:
- iord $r2 I[$r1 + 0x000]
- or $r2 $r2
- bra ne #ctx_chan_wait
- call #ctx_4160c
- ret
-
-// Execute per-context state overrides list
-//
-// Only executed on the first load of a channel. Might want to look into
-// removing this and having the host directly modify the channel's context
-// to change this state... The nouveau DRM already builds this list as
-// it's definitely needed for NVIDIA's, so we may as well use it for now
-//
-// Input: $r1 mmio list length
-//
-ctx_mmio_exec:
- // set transfer base to be the mmio list
- ld b32 $r3 D[$r0 + #chan_mmio_address]
- mov $r2 0xa04
- shl b32 $r2 6
- iowr I[$r2 + 0x000] $r3 // MEM_BASE
-
- clear b32 $r3
- ctx_mmio_loop:
- // fetch next 256 bytes of mmio list if necessary
- and $r4 $r3 0xff
- bra ne #ctx_mmio_pull
- mov $r5 #xfer_data
- sethi $r5 0x00060000 // 256 bytes
- xdld $r3 $r5
- xdwait
-
- // execute a single list entry
- ctx_mmio_pull:
- ld b32 $r14 D[$r4 + #xfer_data + 0x00]
- ld b32 $r15 D[$r4 + #xfer_data + 0x04]
- call #nv_wr32
-
- // next!
- add b32 $r3 8
- sub b32 $r1 1
- bra ne #ctx_mmio_loop
-
- // set transfer base back to the current context
- ctx_mmio_done:
- ld b32 $r3 D[$r0 + #ctx_current]
- iowr I[$r2 + 0x000] $r3 // MEM_BASE
-
- // disable the mmio list now, we don't need/want to execute it again
- st b32 D[$r0 + #chan_mmio_count] $r0
- mov $r1 #chan_data
- sethi $r1 0x00060000 // 256 bytes
- xdst $r0 $r1
- xdwait
- ret
-
-// Transfer HUB context data between GPU and storage area
-//
-// In: $r2 channel address
-// $p1 clear on save, set on load
-// $p2 set if opposite direction done/will be done, so:
-// on save it means: "a load will follow this save"
-// on load it means: "a save preceeded this load"
-//
-ctx_xfer:
- // according to mwk, some kind of wait for idle
- mov $r15 0xc00
- shl b32 $r15 6
- mov $r14 4
- iowr I[$r15 + 0x200] $r14
- ctx_xfer_idle:
- iord $r14 I[$r15 + 0x000]
- and $r14 0x2000
- bra ne #ctx_xfer_idle
-
- bra not $p1 #ctx_xfer_pre
- bra $p2 #ctx_xfer_pre_load
- ctx_xfer_pre:
- mov $r15 0x10
- call #ctx_86c
- call #ctx_4160s
- bra not $p1 #ctx_xfer_exec
-
- ctx_xfer_pre_load:
- mov $r15 2
- call #ctx_4170s
- call #ctx_4170w
- call #ctx_redswitch
- clear b32 $r15
- call #ctx_4170s
- call #ctx_load
-
- // fetch context pointer, and initiate xfer on all GPCs
- ctx_xfer_exec:
- ld b32 $r1 D[$r0 + #ctx_current]
- mov $r2 0x414
- shl b32 $r2 6
- iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
- mov $r14 -0x5b00
- sethi $r14 0x410000
- mov b32 $r15 $r1
- call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
- add b32 $r14 4
- xbit $r15 $flags $p1
- xbit $r2 $flags $p2
- shl b32 $r2 1
- or $r15 $r2
- call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
-
- // strands
- mov $r1 0x4afc
- sethi $r1 0x20000
- mov $r2 0xc
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
- call #strand_wait
- mov $r2 0x47fc
- sethi $r2 0x20000
- iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
- xbit $r2 $flags $p1
- add b32 $r2 3
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
-
- // mmio context
- xbit $r10 $flags $p1 // direction
- or $r10 6 // first, last
- mov $r11 0 // base = 0
- ld b32 $r12 D[$r0 + #hub_mmio_list_head]
- ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
- mov $r14 0 // not multi
- call #mmctx_xfer
-
- // wait for GPCs to all complete
- mov $r10 8 // DONE_BAR
- call #wait_doneo
-
- // wait for strand xfer to complete
- call #strand_wait
-
- // post-op
- bra $p1 #ctx_xfer_post
- mov $r10 12 // DONE_UNK12
- call #wait_donez
- mov $r1 0xa10
- shl b32 $r1 6
- mov $r2 5
- iowr I[$r1] $r2 // MEM_CMD
- ctx_xfer_post_save_wait:
- iord $r2 I[$r1]
- or $r2 $r2
- bra ne #ctx_xfer_post_save_wait
-
- bra $p2 #ctx_xfer_done
- ctx_xfer_post:
- mov $r15 2
- call #ctx_4170s
- clear b32 $r15
- call #ctx_86c
- call #strand_post
- call #ctx_4170w
- clear b32 $r15
- call #ctx_4170s
-
- bra not $p1 #ctx_xfer_no_post_mmio
- ld b32 $r1 D[$r0 + #chan_mmio_count]
- or $r1 $r1
- bra e #ctx_xfer_no_post_mmio
- call #ctx_mmio_exec
-
- ctx_xfer_no_post_mmio:
- call #ctx_4160c
-
- ctx_xfer_done:
- ret
-
+#include "com.fuc"
+#include "hub.fuc"
.align 256
+#undef INCLUDE_CODE
0x0089d000,
0x081887f1,
0xd00684b6,
-/* 0x00e2: wait_done_wait_donez */
+/* 0x00e2: wait_donez_ne */
0x87f1008a,
0x84b60400,
0x0088cf06,
0x87f10089,
0x84b60818,
0x008ad006,
-/* 0x011c: wait_done_wait_doneo */
+/* 0x011c: wait_doneo_e */
0x040087f1,
0xcf0684b6,
0x8aff0088,
-/* fuc microcode for nve0 PGRAPH/HUB
- *
- * Copyright 2011 Red Hat Inc.
+/*
+ * Copyright 2013 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Authors: Ben Skeggs
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
*/
-/* To build:
- * m4 nve0_grhub.fuc | envyas -a -w -m fuc -V nva3 -o nve0_grhub.fuc.h
- */
+#define NVGK
+#include "macros.fuc"
.section #nve0_grhub_data
-include(`nve0.fuc')
-gpc_count: .b32 0
-rop_count: .b32 0
-cmd_queue: queue_init
-hub_mmio_list_head: .b32 0
-hub_mmio_list_tail: .b32 0
-
-ctx_current: .b32 0
-
-.align 256
-chan_data:
-chan_mmio_count: .b32 0
-chan_mmio_address: .b32 0
+#define INCLUDE_DATA
+#include "com.fuc"
+#include "hub.fuc"
-.align 256
-xfer_data: .b32 0
-
-.align 256
chipsets:
.b8 0xe4 0 0 0
.b16 #nve4_hub_mmio_head
mmctx_data(0x408900, 3)
mmctx_data(0x408980, 1)
nvf0_hub_mmio_tail:
+#undef INCLUDE_DATA
.section #nve0_grhub_code
+#define INCLUDE_CODE
bra #init
-define(`include_code')
-include(`nve0.fuc')
-
-// reports an exception to the host
-//
-// In: $r15 error code (see nve0.fuc)
-//
-error:
- push $r14
- mov $r14 0x814
- shl b32 $r14 6
- iowr I[$r14 + 0x000] $r15 // CC_SCRATCH[5] = error code
- mov $r14 0xc1c
- shl b32 $r14 6
- mov $r15 1
- iowr I[$r14 + 0x000] $r15 // INTR_UP_SET
- pop $r14
- ret
-
-// HUB fuc initialisation, executed by triggering ucode start, will
-// fall through to main loop after completion.
-//
-// Input:
-// CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh)
-//
-// Output:
-// CC_SCRATCH[0]:
-// 31:31: set to signal completion
-// CC_SCRATCH[1]:
-// 31:0: total PGRAPH context size
-//
-init:
- clear b32 $r0
- mov $sp $r0
- mov $xdbase $r0
-
- // enable fifo access
- mov $r1 0x1200
- mov $r2 2
- iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE
-
- // setup i0 handler, and route all interrupts to it
- mov $r1 #ih
- mov $iv0 $r1
- mov $r1 0x400
- iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH
-
- // route HUB_CHANNEL_SWITCH to fuc interrupt 8
- mov $r3 0x404
- shl b32 $r3 6
- mov $r2 0x2003 // { HUB_CHANNEL_SWITCH, ZERO } -> intr 8
- iowr I[$r3 + 0x000] $r2
-
- // not sure what these are, route them because NVIDIA does, and
- // the IRQ handler will signal the host if we ever get one.. we
- // may find out if/why we need to handle these if so..
- //
- mov $r2 0x2004
- iowr I[$r3 + 0x004] $r2 // { 0x04, ZERO } -> intr 9
- mov $r2 0x200b
- iowr I[$r3 + 0x008] $r2 // { 0x0b, ZERO } -> intr 10
- mov $r2 0x200c
- iowr I[$r3 + 0x01c] $r2 // { 0x0c, ZERO } -> intr 15
-
- // enable all INTR_UP interrupts
- mov $r2 0xc24
- shl b32 $r2 6
- not b32 $r3 $r0
- iowr I[$r2] $r3
-
- // enable fifo, ctxsw, 9, 10, 15 interrupts
- mov $r2 -0x78fc // 0x8704
- sethi $r2 0
- iowr I[$r1 + 0x000] $r2 // INTR_EN_SET
-
- // fifo level triggered, rest edge
- sub b32 $r1 0x100
- mov $r2 4
- iowr I[$r1] $r2
-
- // enable interrupts
- bset $flags ie0
-
- // fetch enabled GPC/ROP counts
- mov $r14 -0x69fc // 0x409604
- sethi $r14 0x400000
- call #nv_rd32
- extr $r1 $r15 16:20
- st b32 D[$r0 + #rop_count] $r1
- and $r15 0x1f
- st b32 D[$r0 + #gpc_count] $r15
-
- // set BAR_REQMASK to GPC mask
- mov $r1 1
- shl b32 $r1 $r15
- sub b32 $r1 1
- mov $r2 0x40c
- shl b32 $r2 6
- iowr I[$r2 + 0x000] $r1
- iowr I[$r2 + 0x100] $r1
-
- // find context data for this chipset
- mov $r2 0x800
- shl b32 $r2 6
- iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0]
- mov $r15 #chipsets - 8
- init_find_chipset:
- add b32 $r15 8
- ld b32 $r3 D[$r15 + 0x00]
- cmpu b32 $r3 $r2
- bra e #init_context
- cmpu b32 $r3 0
- bra ne #init_find_chipset
- // unknown chipset
- ret
-
- // context size calculation, reserve first 256 bytes for use by fuc
- init_context:
- mov $r1 256
-
- // calculate size of mmio context data
- ld b16 $r14 D[$r15 + 4]
- ld b16 $r15 D[$r15 + 6]
- sethi $r14 0
- st b32 D[$r0 + #hub_mmio_list_head] $r14
- st b32 D[$r0 + #hub_mmio_list_tail] $r15
- call #mmctx_size
-
- // set mmctx base addresses now so we don't have to do it later,
- // they don't (currently) ever change
- mov $r3 0x700
- shl b32 $r3 6
- shr b32 $r4 $r1 8
- iowr I[$r3 + 0x000] $r4 // MMCTX_SAVE_SWBASE
- iowr I[$r3 + 0x100] $r4 // MMCTX_LOAD_SWBASE
- add b32 $r3 0x1300
- add b32 $r1 $r15
- shr b32 $r15 2
- iowr I[$r3 + 0x000] $r15 // MMCTX_LOAD_COUNT, wtf for?!?
-
- // strands, base offset needs to be aligned to 256 bytes
- shr b32 $r1 8
- add b32 $r1 1
- shl b32 $r1 8
- mov b32 $r15 $r1
- call #strand_ctx_init
- add b32 $r1 $r15
-
- // initialise each GPC in sequence by passing in the offset of its
- // context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
- // has previously been uploaded by the host) running.
- //
- // the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
- // when it has completed, and return the size of its context data
- // in GPCn_CC_SCRATCH[1]
- //
- ld b32 $r3 D[$r0 + #gpc_count]
- mov $r4 0x2000
- sethi $r4 0x500000
- init_gpc:
- // setup, and start GPC ucode running
- add b32 $r14 $r4 0x804
- mov b32 $r15 $r1
- call #nv_wr32 // CC_SCRATCH[1] = ctx offset
- add b32 $r14 $r4 0x800
- mov b32 $r15 $r2
- call #nv_wr32 // CC_SCRATCH[0] = chipset
- add b32 $r14 $r4 0x10c
- clear b32 $r15
- call #nv_wr32
- add b32 $r14 $r4 0x104
- call #nv_wr32 // ENTRY
- add b32 $r14 $r4 0x100
- mov $r15 2 // CTRL_START_TRIGGER
- call #nv_wr32 // CTRL
-
- // wait for it to complete, and adjust context size
- add b32 $r14 $r4 0x800
- init_gpc_wait:
- call #nv_rd32
- xbit $r15 $r15 31
- bra e #init_gpc_wait
- add b32 $r14 $r4 0x804
- call #nv_rd32
- add b32 $r1 $r15
-
- // next!
- add b32 $r4 0x8000
- sub b32 $r3 1
- bra ne #init_gpc
-
- // save context size, and tell host we're ready
- mov $r2 0x800
- shl b32 $r2 6
- iowr I[$r2 + 0x100] $r1 // CC_SCRATCH[1] = context size
- add b32 $r2 0x800
- clear b32 $r1
- bset $r1 31
- iowr I[$r2 + 0x000] $r1 // CC_SCRATCH[0] |= 0x80000000
-
-// Main program loop, very simple, sleeps until woken up by the interrupt
-// handler, pulls a command from the queue and executes its handler
-//
-main:
- // sleep until we have something to do
- bset $flags $p0
- sleep $p0
- mov $r13 #cmd_queue
- call #queue_get
- bra $p1 #main
-
- // context switch, requested by GPU?
- cmpu b32 $r14 0x4001
- bra ne #main_not_ctx_switch
- trace_set(T_AUTO)
- mov $r1 0xb00
- shl b32 $r1 6
- iord $r2 I[$r1 + 0x100] // CHAN_NEXT
- iord $r1 I[$r1 + 0x000] // CHAN_CUR
-
- xbit $r3 $r1 31
- bra e #chsw_no_prev
- xbit $r3 $r2 31
- bra e #chsw_prev_no_next
- push $r2
- mov b32 $r2 $r1
- trace_set(T_SAVE)
- bclr $flags $p1
- bset $flags $p2
- call #ctx_xfer
- trace_clr(T_SAVE);
- pop $r2
- trace_set(T_LOAD);
- bset $flags $p1
- call #ctx_xfer
- trace_clr(T_LOAD);
- bra #chsw_done
- chsw_prev_no_next:
- push $r2
- mov b32 $r2 $r1
- bclr $flags $p1
- bclr $flags $p2
- call #ctx_xfer
- pop $r2
- mov $r1 0xb00
- shl b32 $r1 6
- iowr I[$r1] $r2
- bra #chsw_done
- chsw_no_prev:
- xbit $r3 $r2 31
- bra e #chsw_done
- bset $flags $p1
- bclr $flags $p2
- call #ctx_xfer
-
- // ack the context switch request
- chsw_done:
- mov $r1 0xb0c
- shl b32 $r1 6
- mov $r2 1
- iowr I[$r1 + 0x000] $r2 // 0x409b0c
- trace_clr(T_AUTO)
- bra #main
-
- // request to set current channel? (*not* a context switch)
- main_not_ctx_switch:
- cmpu b32 $r14 0x0001
- bra ne #main_not_ctx_chan
- mov b32 $r2 $r15
- call #ctx_chan
- bra #main_done
-
- // request to store current channel context?
- main_not_ctx_chan:
- cmpu b32 $r14 0x0002
- bra ne #main_not_ctx_save
- trace_set(T_SAVE)
- bclr $flags $p1
- bclr $flags $p2
- call #ctx_xfer
- trace_clr(T_SAVE)
- bra #main_done
-
- main_not_ctx_save:
- shl b32 $r15 $r14 16
- or $r15 E_BAD_COMMAND
- call #error
- bra #main
-
- main_done:
- mov $r1 0x820
- shl b32 $r1 6
- clear b32 $r2
- bset $r2 31
- iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000
- bra #main
-
-// interrupt handler
-ih:
- push $r8
- mov $r8 $flags
- push $r8
- push $r9
- push $r10
- push $r11
- push $r13
- push $r14
- push $r15
-
- // incoming fifo command?
- iord $r10 I[$r0 + 0x200] // INTR
- and $r11 $r10 0x00000004
- bra e #ih_no_fifo
- // queue incoming fifo command for later processing
- mov $r11 0x1900
- mov $r13 #cmd_queue
- iord $r14 I[$r11 + 0x100] // FIFO_CMD
- iord $r15 I[$r11 + 0x000] // FIFO_DATA
- call #queue_put
- add b32 $r11 0x400
- mov $r14 1
- iowr I[$r11 + 0x000] $r14 // FIFO_ACK
-
- // context switch request?
- ih_no_fifo:
- and $r11 $r10 0x00000100
- bra e #ih_no_ctxsw
- // enqueue a context switch for later processing
- mov $r13 #cmd_queue
- mov $r14 0x4001
- call #queue_put
-
- // anything we didn't handle, bring it to the host's attention
- ih_no_ctxsw:
- mov $r11 0x104
- not b32 $r11
- and $r11 $r10 $r11
- bra e #ih_no_other
- mov $r10 0xc1c
- shl b32 $r10 6
- iowr I[$r10] $r11 // INTR_UP_SET
-
- // ack, and wake up main()
- ih_no_other:
- iowr I[$r0 + 0x100] $r10 // INTR_ACK
-
- pop $r15
- pop $r14
- pop $r13
- pop $r11
- pop $r10
- pop $r9
- pop $r8
- mov $flags $r8
- pop $r8
- bclr $flags $p0
- iret
-
-// Again, not real sure
-//
-// In: $r15 value to set 0x404170 to
-//
-ctx_4170s:
- mov $r14 0x4170
- sethi $r14 0x400000
- or $r15 0x10
- call #nv_wr32
- ret
-
-// Waits for a ctx_4170s() call to complete
-//
-ctx_4170w:
- mov $r14 0x4170
- sethi $r14 0x400000
- call #nv_rd32
- and $r15 0x10
- bra ne #ctx_4170w
- ret
-
-// Disables various things, waits a bit, and re-enables them..
-//
-// Not sure how exactly this helps, perhaps "ENABLE" is not such a
-// good description for the bits we turn off? Anyways, without this,
-// funny things happen.
-//
-ctx_redswitch:
- mov $r14 0x614
- shl b32 $r14 6
- mov $r15 0x270
- iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_GPC, POWER_ALL
- mov $r15 8
- ctx_redswitch_delay:
- sub b32 $r15 1
- bra ne #ctx_redswitch_delay
- mov $r15 0x770
- iowr I[$r14] $r15 // HUB_RED_SWITCH = ENABLE_ALL, POWER_ALL
- ret
-
-// Not a clue what this is for, except that unless the value is 0x10, the
-// strand context is saved (and presumably restored) incorrectly..
-//
-// In: $r15 value to set to (0x00/0x10 are used)
-//
-ctx_86c:
- mov $r14 0x86c
- shl b32 $r14 6
- iowr I[$r14] $r15 // HUB(0x86c) = val
- mov $r14 -0x75ec
- sethi $r14 0x400000
- call #nv_wr32 // ROP(0xa14) = val
- mov $r14 -0x5794
- sethi $r14 0x410000
- call #nv_wr32 // GPC(0x86c) = val
- ret
-
-// ctx_load - load's a channel's ctxctl data, and selects its vm
-//
-// In: $r2 channel address
-//
-ctx_load:
- trace_set(T_CHAN)
-
- // switch to channel, somewhat magic in parts..
- mov $r10 12 // DONE_UNK12
- call #wait_donez
- mov $r1 0xa24
- shl b32 $r1 6
- iowr I[$r1 + 0x000] $r0 // 0x409a24
- mov $r3 0xb00
- shl b32 $r3 6
- iowr I[$r3 + 0x100] $r2 // CHAN_NEXT
- mov $r1 0xa0c
- shl b32 $r1 6
- mov $r4 7
- iowr I[$r1 + 0x000] $r2 // MEM_CHAN
- iowr I[$r1 + 0x100] $r4 // MEM_CMD
- ctx_chan_wait_0:
- iord $r4 I[$r1 + 0x100]
- and $r4 0x1f
- bra ne #ctx_chan_wait_0
- iowr I[$r3 + 0x000] $r2 // CHAN_CUR
-
- // load channel header, fetch PGRAPH context pointer
- mov $xtargets $r0
- bclr $r2 31
- shl b32 $r2 4
- add b32 $r2 2
-
- trace_set(T_LCHAN)
- mov $r1 0xa04
- shl b32 $r1 6
- iowr I[$r1 + 0x000] $r2 // MEM_BASE
- mov $r1 0xa20
- shl b32 $r1 6
- mov $r2 0x0002
- sethi $r2 0x80000000
- iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vram
- mov $r1 0x10 // chan + 0x0210
- mov $r2 #xfer_data
- sethi $r2 0x00020000 // 16 bytes
- xdld $r1 $r2
- xdwait
- trace_clr(T_LCHAN)
-
- // update current context
- ld b32 $r1 D[$r0 + #xfer_data + 4]
- shl b32 $r1 24
- ld b32 $r2 D[$r0 + #xfer_data + 0]
- shr b32 $r2 8
- or $r1 $r2
- st b32 D[$r0 + #ctx_current] $r1
-
- // set transfer base to start of context, and fetch context header
- trace_set(T_LCTXH)
- mov $r2 0xa04
- shl b32 $r2 6
- iowr I[$r2 + 0x000] $r1 // MEM_BASE
- mov $r2 1
- mov $r1 0xa20
- shl b32 $r1 6
- iowr I[$r1 + 0x000] $r2 // MEM_TARGET = vm
- mov $r1 #chan_data
- sethi $r1 0x00060000 // 256 bytes
- xdld $r0 $r1
- xdwait
- trace_clr(T_LCTXH)
-
- trace_clr(T_CHAN)
- ret
-
-// ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
-// the active channel for ctxctl, but not actually transfer
-// any context data. intended for use only during initial
-// context construction.
-//
-// In: $r2 channel address
-//
-ctx_chan:
- call #ctx_load
- mov $r10 12 // DONE_UNK12
- call #wait_donez
- mov $r1 0xa10
- shl b32 $r1 6
- mov $r2 5
- iowr I[$r1 + 0x000] $r2 // MEM_CMD = 5 (???)
- ctx_chan_wait:
- iord $r2 I[$r1 + 0x000]
- or $r2 $r2
- bra ne #ctx_chan_wait
- ret
-
-// Execute per-context state overrides list
-//
-// Only executed on the first load of a channel. Might want to look into
-// removing this and having the host directly modify the channel's context
-// to change this state... The nouveau DRM already builds this list as
-// it's definitely needed for NVIDIA's, so we may as well use it for now
-//
-// Input: $r1 mmio list length
-//
-ctx_mmio_exec:
- // set transfer base to be the mmio list
- ld b32 $r3 D[$r0 + #chan_mmio_address]
- mov $r2 0xa04
- shl b32 $r2 6
- iowr I[$r2 + 0x000] $r3 // MEM_BASE
-
- clear b32 $r3
- ctx_mmio_loop:
- // fetch next 256 bytes of mmio list if necessary
- and $r4 $r3 0xff
- bra ne #ctx_mmio_pull
- mov $r5 #xfer_data
- sethi $r5 0x00060000 // 256 bytes
- xdld $r3 $r5
- xdwait
-
- // execute a single list entry
- ctx_mmio_pull:
- ld b32 $r14 D[$r4 + #xfer_data + 0x00]
- ld b32 $r15 D[$r4 + #xfer_data + 0x04]
- call #nv_wr32
-
- // next!
- add b32 $r3 8
- sub b32 $r1 1
- bra ne #ctx_mmio_loop
-
- // set transfer base back to the current context
- ctx_mmio_done:
- ld b32 $r3 D[$r0 + #ctx_current]
- iowr I[$r2 + 0x000] $r3 // MEM_BASE
-
- // disable the mmio list now, we don't need/want to execute it again
- st b32 D[$r0 + #chan_mmio_count] $r0
- mov $r1 #chan_data
- sethi $r1 0x00060000 // 256 bytes
- xdst $r0 $r1
- xdwait
- ret
-
-// Transfer HUB context data between GPU and storage area
-//
-// In: $r2 channel address
-// $p1 clear on save, set on load
-// $p2 set if opposite direction done/will be done, so:
-// on save it means: "a load will follow this save"
-// on load it means: "a save preceeded this load"
-//
-ctx_xfer:
- // according to mwk, some kind of wait for idle
- mov $r15 0xc00
- shl b32 $r15 6
- mov $r14 4
- iowr I[$r15 + 0x200] $r14
- ctx_xfer_idle:
- iord $r14 I[$r15 + 0x000]
- and $r14 0x2000
- bra ne #ctx_xfer_idle
-
- bra not $p1 #ctx_xfer_pre
- bra $p2 #ctx_xfer_pre_load
- ctx_xfer_pre:
- mov $r15 0x10
- call #ctx_86c
- bra not $p1 #ctx_xfer_exec
-
- ctx_xfer_pre_load:
- mov $r15 2
- call #ctx_4170s
- call #ctx_4170w
- call #ctx_redswitch
- clear b32 $r15
- call #ctx_4170s
- call #ctx_load
-
- // fetch context pointer, and initiate xfer on all GPCs
- ctx_xfer_exec:
- ld b32 $r1 D[$r0 + #ctx_current]
- mov $r2 0x414
- shl b32 $r2 6
- iowr I[$r2 + 0x000] $r0 // BAR_STATUS = reset
- mov $r14 -0x5b00
- sethi $r14 0x410000
- mov b32 $r15 $r1
- call #nv_wr32 // GPC_BCAST_WRCMD_DATA = ctx pointer
- add b32 $r14 4
- xbit $r15 $flags $p1
- xbit $r2 $flags $p2
- shl b32 $r2 1
- or $r15 $r2
- call #nv_wr32 // GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
-
- // strands
- mov $r1 0x4afc
- sethi $r1 0x20000
- mov $r2 0xc
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c
- call #strand_wait
- mov $r2 0x47fc
- sethi $r2 0x20000
- iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00
- xbit $r2 $flags $p1
- add b32 $r2 3
- iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD)
-
- // mmio context
- xbit $r10 $flags $p1 // direction
- or $r10 6 // first, last
- mov $r11 0 // base = 0
- ld b32 $r12 D[$r0 + #hub_mmio_list_head]
- ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
- mov $r14 0 // not multi
- call #mmctx_xfer
-
- // wait for GPCs to all complete
- mov $r10 8 // DONE_BAR
- call #wait_doneo
-
- // wait for strand xfer to complete
- call #strand_wait
-
- // post-op
- bra $p1 #ctx_xfer_post
- mov $r10 12 // DONE_UNK12
- call #wait_donez
- mov $r1 0xa10
- shl b32 $r1 6
- mov $r2 5
- iowr I[$r1] $r2 // MEM_CMD
- ctx_xfer_post_save_wait:
- iord $r2 I[$r1]
- or $r2 $r2
- bra ne #ctx_xfer_post_save_wait
-
- bra $p2 #ctx_xfer_done
- ctx_xfer_post:
- mov $r15 2
- call #ctx_4170s
- clear b32 $r15
- call #ctx_86c
- call #strand_post
- call #ctx_4170w
- clear b32 $r15
- call #ctx_4170s
-
- bra not $p1 #ctx_xfer_no_post_mmio
- ld b32 $r1 D[$r0 + #chan_mmio_count]
- or $r1 $r1
- bra e #ctx_xfer_no_post_mmio
- call #ctx_mmio_exec
-
- ctx_xfer_no_post_mmio:
-
- ctx_xfer_done:
- ret
-
+#include "com.fuc"
+#include "hub.fuc"
.align 256
+#undef INCLUDE_CODE
0x0089d000,
0x081887f1,
0xd00684b6,
-/* 0x00e2: wait_done_wait_donez */
+/* 0x00e2: wait_donez_ne */
0x87f1008a,
0x84b60400,
0x0088cf06,
0x87f10089,
0x84b60818,
0x008ad006,
-/* 0x011c: wait_done_wait_doneo */
+/* 0x011c: wait_doneo_e */
0x040087f1,
0xcf0684b6,
0x8aff0088,
--- /dev/null
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+
+#include "os.h"
+
+#define mmctx_data(r,c) .b32 (((c - 1) << 26) | r)
+#define queue_init .skip 72 // (2 * 4) + ((8 * 4) * 2)
+
+#define T_WAIT 0
+#define T_MMCTX 1
+#define T_STRWAIT 2
+#define T_STRINIT 3
+#define T_AUTO 4
+#define T_CHAN 5
+#define T_LOAD 6
+#define T_SAVE 7
+#define T_LCHAN 8
+#define T_LCTXH 9
+
+#define trace_set(bit) /*
+*/ mov $r8 0x83c /*
+*/ shl b32 $r8 6 /*
+*/ clear b32 $r9 /*
+*/ bset $r9 bit /*
+*/ iowr I[$r8 + 0x000] $r9
+
+#define trace_clr(bit) /*
+*/ mov $r8 0x85c /*
+*/ shl b32 $r8 6 /*
+*/ clear b32 $r9 /*
+*/ bset $r9 bit /*
+*/ iowr I[$r8 + 0x000] $r9
+++ /dev/null
-/* fuc microcode util functions for nvc0 PGRAPH
- *
- * Copyright 2011 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-
-define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)')
-define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))')
-
-ifdef(`include_code', `
-// Error codes
-define(`E_BAD_COMMAND', 0x01)
-define(`E_CMD_OVERFLOW', 0x02)
-
-// Util macros to help with debugging ucode hangs etc
-define(`T_WAIT', 0)
-define(`T_MMCTX', 1)
-define(`T_STRWAIT', 2)
-define(`T_STRINIT', 3)
-define(`T_AUTO', 4)
-define(`T_CHAN', 5)
-define(`T_LOAD', 6)
-define(`T_SAVE', 7)
-define(`T_LCHAN', 8)
-define(`T_LCTXH', 9)
-
-define(`trace_set', `
- mov $r8 0x83c
- shl b32 $r8 6
- clear b32 $r9
- bset $r9 $1
- iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
-')
-
-define(`trace_clr', `
- mov $r8 0x85c
- shl b32 $r8 6
- clear b32 $r9
- bset $r9 $1
- iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
-')
-
-// queue_put - add request to queue
-//
-// In : $r13 queue pointer
-// $r14 command
-// $r15 data
-//
-queue_put:
- // make sure we have space..
- ld b32 $r8 D[$r13 + 0x0] // GET
- ld b32 $r9 D[$r13 + 0x4] // PUT
- xor $r8 8
- cmpu b32 $r8 $r9
- bra ne #queue_put_next
- mov $r15 E_CMD_OVERFLOW
- call #error
- ret
-
- // store cmd/data on queue
- queue_put_next:
- and $r8 $r9 7
- shl b32 $r8 3
- add b32 $r8 $r13
- add b32 $r8 8
- st b32 D[$r8 + 0x0] $r14
- st b32 D[$r8 + 0x4] $r15
-
- // update PUT
- add b32 $r9 1
- and $r9 0xf
- st b32 D[$r13 + 0x4] $r9
- ret
-
-// queue_get - fetch request from queue
-//
-// In : $r13 queue pointer
-//
-// Out: $p1 clear on success (data available)
-// $r14 command
-// $r15 data
-//
-queue_get:
- bset $flags $p1
- ld b32 $r8 D[$r13 + 0x0] // GET
- ld b32 $r9 D[$r13 + 0x4] // PUT
- cmpu b32 $r8 $r9
- bra e #queue_get_done
- // fetch first cmd/data pair
- and $r9 $r8 7
- shl b32 $r9 3
- add b32 $r9 $r13
- add b32 $r9 8
- ld b32 $r14 D[$r9 + 0x0]
- ld b32 $r15 D[$r9 + 0x4]
-
- // update GET
- add b32 $r8 1
- and $r8 0xf
- st b32 D[$r13 + 0x0] $r8
- bclr $flags $p1
-queue_get_done:
- ret
-
-// nv_rd32 - read 32-bit value from nv register
-//
-// In : $r14 register
-// Out: $r15 value
-//
-nv_rd32:
- mov $r11 0x728
- shl b32 $r11 6
- mov b32 $r12 $r14
- bset $r12 31 // MMIO_CTRL_PENDING
- iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
- nv_rd32_wait:
- iord $r12 I[$r11 + 0x000]
- xbit $r12 $r12 31
- bra ne #nv_rd32_wait
- mov $r10 6 // DONE_MMIO_RD
- call #wait_doneo
- iord $r15 I[$r11 + 0x100] // MMIO_RDVAL
- ret
-
-// nv_wr32 - write 32-bit value to nv register
-//
-// In : $r14 register
-// $r15 value
-//
-nv_wr32:
- mov $r11 0x728
- shl b32 $r11 6
- iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL
- mov b32 $r12 $r14
- bset $r12 31 // MMIO_CTRL_PENDING
- bset $r12 30 // MMIO_CTRL_WRITE
- iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
- nv_wr32_wait:
- iord $r12 I[$r11 + 0x000]
- xbit $r12 $r12 31
- bra ne #nv_wr32_wait
- ret
-
-// (re)set watchdog timer
-//
-// In : $r15 timeout
-//
-watchdog_reset:
- mov $r8 0x430
- shl b32 $r8 6
- bset $r15 31
- iowr I[$r8 + 0x000] $r15
- ret
-
-// clear watchdog timer
-watchdog_clear:
- mov $r8 0x430
- shl b32 $r8 6
- iowr I[$r8 + 0x000] $r0
- ret
-
-// wait_done{z,o} - wait on FUC_DONE bit to become clear/set
-//
-// In : $r10 bit to wait on
-//
-define(`wait_done', `
-$1:
- trace_set(T_WAIT);
- mov $r8 0x818
- shl b32 $r8 6
- iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit
- wait_done_$1:
- mov $r8 0x400
- shl b32 $r8 6
- iord $r8 I[$r8 + 0x000] // DONE
- xbit $r8 $r8 $r10
- bra $2 #wait_done_$1
- trace_clr(T_WAIT)
- ret
-')
-wait_done(wait_donez, ne)
-wait_done(wait_doneo, e)
-
-// mmctx_size - determine size of a mmio list transfer
-//
-// In : $r14 mmio list head
-// $r15 mmio list tail
-// Out: $r15 transfer size (in bytes)
-//
-mmctx_size:
- clear b32 $r9
- nv_mmctx_size_loop:
- ld b32 $r8 D[$r14]
- shr b32 $r8 26
- add b32 $r8 1
- shl b32 $r8 2
- add b32 $r9 $r8
- add b32 $r14 4
- cmpu b32 $r14 $r15
- bra ne #nv_mmctx_size_loop
- mov b32 $r15 $r9
- ret
-
-// mmctx_xfer - execute a list of mmio transfers
-//
-// In : $r10 flags
-// bit 0: direction (0 = save, 1 = load)
-// bit 1: set if first transfer
-// bit 2: set if last transfer
-// $r11 base
-// $r12 mmio list head
-// $r13 mmio list tail
-// $r14 multi_stride
-// $r15 multi_mask
-//
-mmctx_xfer:
- trace_set(T_MMCTX)
- mov $r8 0x710
- shl b32 $r8 6
- clear b32 $r9
- or $r11 $r11
- bra e #mmctx_base_disabled
- iowr I[$r8 + 0x000] $r11 // MMCTX_BASE
- bset $r9 0 // BASE_EN
- mmctx_base_disabled:
- or $r14 $r14
- bra e #mmctx_multi_disabled
- iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE
- iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK
- bset $r9 1 // MULTI_EN
- mmctx_multi_disabled:
- add b32 $r8 0x100
-
- xbit $r11 $r10 0
- shl b32 $r11 16 // DIR
- bset $r11 12 // QLIMIT = 0x10
- xbit $r14 $r10 1
- shl b32 $r14 17
- or $r11 $r14 // START_TRIGGER
- iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
-
- // loop over the mmio list, and send requests to the hw
- mmctx_exec_loop:
- // wait for space in mmctx queue
- mmctx_wait_free:
- iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
- and $r14 0x1f
- bra e #mmctx_wait_free
-
- // queue up an entry
- ld b32 $r14 D[$r12]
- or $r14 $r9
- iowr I[$r8 + 0x300] $r14
- add b32 $r12 4
- cmpu b32 $r12 $r13
- bra ne #mmctx_exec_loop
-
- xbit $r11 $r10 2
- bra ne #mmctx_stop
- // wait for queue to empty
- mmctx_fini_wait:
- iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
- and $r11 0x1f
- cmpu b32 $r11 0x10
- bra ne #mmctx_fini_wait
- mov $r10 2 // DONE_MMCTX
- call #wait_donez
- bra #mmctx_done
- mmctx_stop:
- xbit $r11 $r10 0
- shl b32 $r11 16 // DIR
- bset $r11 12 // QLIMIT = 0x10
- bset $r11 18 // STOP_TRIGGER
- iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
- mmctx_stop_wait:
- // wait for STOP_TRIGGER to clear
- iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
- xbit $r11 $r11 18
- bra ne #mmctx_stop_wait
- mmctx_done:
- trace_clr(T_MMCTX)
- ret
-
-// Wait for DONE_STRAND
-//
-strand_wait:
- push $r10
- mov $r10 2
- call #wait_donez
- pop $r10
- ret
-
-// unknown - call before issuing strand commands
-//
-strand_pre:
- mov $r8 0x4afc
- sethi $r8 0x20000
- mov $r9 0xc
- iowr I[$r8] $r9
- call #strand_wait
- ret
-
-// unknown - call after issuing strand commands
-//
-strand_post:
- mov $r8 0x4afc
- sethi $r8 0x20000
- mov $r9 0xd
- iowr I[$r8] $r9
- call #strand_wait
- ret
-
-// Selects strand set?!
-//
-// In: $r14 id
-//
-strand_set:
- mov $r10 0x4ffc
- sethi $r10 0x20000
- sub b32 $r11 $r10 0x500
- mov $r12 0xf
- iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf
- mov $r12 0xb
- iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb
- call #strand_wait
- iowr I[$r10 + 0x000] $r14 // 0x93c = <id>
- mov $r12 0xa
- iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa
- call #strand_wait
- ret
-
-// Initialise strand context data
-//
-// In : $r15 context base
-// Out: $r15 context size (in bytes)
-//
-// Strandset(?) 3 hardcoded currently
-//
-strand_ctx_init:
- trace_set(T_STRINIT)
- call #strand_pre
- mov $r14 3
- call #strand_set
- mov $r10 0x46fc
- sethi $r10 0x20000
- add b32 $r11 $r10 0x400
- iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0
- mov $r12 1
- iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE
- call #strand_wait
- sub b32 $r12 $r0 1
- iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff
- mov $r12 2
- iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT
- call #strand_wait
- call #strand_post
-
- // read the size of each strand, poke the context offset of
- // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
- // about it later then.
- mov $r8 0x880
- shl b32 $r8 6
- iord $r9 I[$r8 + 0x000] // STRANDS
- add b32 $r8 0x2200
- shr b32 $r14 $r15 8
- ctx_init_strand_loop:
- iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE
- iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE
- iord $r10 I[$r8 + 0x200] // STRAND_SIZE
- shr b32 $r10 6
- add b32 $r10 1
- add b32 $r14 $r10
- add b32 $r8 4
- sub b32 $r9 1
- bra ne #ctx_init_strand_loop
-
- shl b32 $r14 8
- sub b32 $r15 $r14 $r15
- trace_clr(T_STRINIT)
- ret
-')
+++ /dev/null
-/* fuc microcode util functions for nve0 PGRAPH
- *
- * Copyright 2011 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-
-define(`mmctx_data', `.b32 eval((($2 - 1) << 26) | $1)')
-define(`queue_init', `.skip eval((2 * 4) + ((8 * 4) * 2))')
-
-ifdef(`include_code', `
-// Error codes
-define(`E_BAD_COMMAND', 0x01)
-define(`E_CMD_OVERFLOW', 0x02)
-
-// Util macros to help with debugging ucode hangs etc
-define(`T_WAIT', 0)
-define(`T_MMCTX', 1)
-define(`T_STRWAIT', 2)
-define(`T_STRINIT', 3)
-define(`T_AUTO', 4)
-define(`T_CHAN', 5)
-define(`T_LOAD', 6)
-define(`T_SAVE', 7)
-define(`T_LCHAN', 8)
-define(`T_LCTXH', 9)
-
-define(`trace_set', `
- mov $r8 0x83c
- shl b32 $r8 6
- clear b32 $r9
- bset $r9 $1
- iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
-')
-
-define(`trace_clr', `
- mov $r8 0x85c
- shl b32 $r8 6
- clear b32 $r9
- bset $r9 $1
- iowr I[$r8 + 0x000] $r9 // CC_SCRATCH[7]
-')
-
-// queue_put - add request to queue
-//
-// In : $r13 queue pointer
-// $r14 command
-// $r15 data
-//
-queue_put:
- // make sure we have space..
- ld b32 $r8 D[$r13 + 0x0] // GET
- ld b32 $r9 D[$r13 + 0x4] // PUT
- xor $r8 8
- cmpu b32 $r8 $r9
- bra ne #queue_put_next
- mov $r15 E_CMD_OVERFLOW
- call #error
- ret
-
- // store cmd/data on queue
- queue_put_next:
- and $r8 $r9 7
- shl b32 $r8 3
- add b32 $r8 $r13
- add b32 $r8 8
- st b32 D[$r8 + 0x0] $r14
- st b32 D[$r8 + 0x4] $r15
-
- // update PUT
- add b32 $r9 1
- and $r9 0xf
- st b32 D[$r13 + 0x4] $r9
- ret
-
-// queue_get - fetch request from queue
-//
-// In : $r13 queue pointer
-//
-// Out: $p1 clear on success (data available)
-// $r14 command
-// $r15 data
-//
-queue_get:
- bset $flags $p1
- ld b32 $r8 D[$r13 + 0x0] // GET
- ld b32 $r9 D[$r13 + 0x4] // PUT
- cmpu b32 $r8 $r9
- bra e #queue_get_done
- // fetch first cmd/data pair
- and $r9 $r8 7
- shl b32 $r9 3
- add b32 $r9 $r13
- add b32 $r9 8
- ld b32 $r14 D[$r9 + 0x0]
- ld b32 $r15 D[$r9 + 0x4]
-
- // update GET
- add b32 $r8 1
- and $r8 0xf
- st b32 D[$r13 + 0x0] $r8
- bclr $flags $p1
-queue_get_done:
- ret
-
-// nv_rd32 - read 32-bit value from nv register
-//
-// In : $r14 register
-// Out: $r15 value
-//
-nv_rd32:
- mov $r11 0x728
- shl b32 $r11 6
- mov b32 $r12 $r14
- bset $r12 31 // MMIO_CTRL_PENDING
- iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
- nv_rd32_wait:
- iord $r12 I[$r11 + 0x000]
- xbit $r12 $r12 31
- bra ne #nv_rd32_wait
- mov $r10 6 // DONE_MMIO_RD
- call #wait_doneo
- iord $r15 I[$r11 + 0x100] // MMIO_RDVAL
- ret
-
-// nv_wr32 - write 32-bit value to nv register
-//
-// In : $r14 register
-// $r15 value
-//
-nv_wr32:
- mov $r11 0x728
- shl b32 $r11 6
- iowr I[$r11 + 0x200] $r15 // MMIO_WRVAL
- mov b32 $r12 $r14
- bset $r12 31 // MMIO_CTRL_PENDING
- bset $r12 30 // MMIO_CTRL_WRITE
- iowr I[$r11 + 0x000] $r12 // MMIO_CTRL
- nv_wr32_wait:
- iord $r12 I[$r11 + 0x000]
- xbit $r12 $r12 31
- bra ne #nv_wr32_wait
- ret
-
-// (re)set watchdog timer
-//
-// In : $r15 timeout
-//
-watchdog_reset:
- mov $r8 0x430
- shl b32 $r8 6
- bset $r15 31
- iowr I[$r8 + 0x000] $r15
- ret
-
-// clear watchdog timer
-watchdog_clear:
- mov $r8 0x430
- shl b32 $r8 6
- iowr I[$r8 + 0x000] $r0
- ret
-
-// wait_done{z,o} - wait on FUC_DONE bit to become clear/set
-//
-// In : $r10 bit to wait on
-//
-define(`wait_done', `
-$1:
- trace_set(T_WAIT);
- mov $r8 0x818
- shl b32 $r8 6
- iowr I[$r8 + 0x000] $r10 // CC_SCRATCH[6] = wait bit
- wait_done_$1:
- mov $r8 0x400
- shl b32 $r8 6
- iord $r8 I[$r8 + 0x000] // DONE
- xbit $r8 $r8 $r10
- bra $2 #wait_done_$1
- trace_clr(T_WAIT)
- ret
-')
-wait_done(wait_donez, ne)
-wait_done(wait_doneo, e)
-
-// mmctx_size - determine size of a mmio list transfer
-//
-// In : $r14 mmio list head
-// $r15 mmio list tail
-// Out: $r15 transfer size (in bytes)
-//
-mmctx_size:
- clear b32 $r9
- nv_mmctx_size_loop:
- ld b32 $r8 D[$r14]
- shr b32 $r8 26
- add b32 $r8 1
- shl b32 $r8 2
- add b32 $r9 $r8
- add b32 $r14 4
- cmpu b32 $r14 $r15
- bra ne #nv_mmctx_size_loop
- mov b32 $r15 $r9
- ret
-
-// mmctx_xfer - execute a list of mmio transfers
-//
-// In : $r10 flags
-// bit 0: direction (0 = save, 1 = load)
-// bit 1: set if first transfer
-// bit 2: set if last transfer
-// $r11 base
-// $r12 mmio list head
-// $r13 mmio list tail
-// $r14 multi_stride
-// $r15 multi_mask
-//
-mmctx_xfer:
- trace_set(T_MMCTX)
- mov $r8 0x710
- shl b32 $r8 6
- clear b32 $r9
- or $r11 $r11
- bra e #mmctx_base_disabled
- iowr I[$r8 + 0x000] $r11 // MMCTX_BASE
- bset $r9 0 // BASE_EN
- mmctx_base_disabled:
- or $r14 $r14
- bra e #mmctx_multi_disabled
- iowr I[$r8 + 0x200] $r14 // MMCTX_MULTI_STRIDE
- iowr I[$r8 + 0x300] $r15 // MMCTX_MULTI_MASK
- bset $r9 1 // MULTI_EN
- mmctx_multi_disabled:
- add b32 $r8 0x100
-
- xbit $r11 $r10 0
- shl b32 $r11 16 // DIR
- bset $r11 12 // QLIMIT = 0x10
- xbit $r14 $r10 1
- shl b32 $r14 17
- or $r11 $r14 // START_TRIGGER
- iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
-
- // loop over the mmio list, and send requests to the hw
- mmctx_exec_loop:
- // wait for space in mmctx queue
- mmctx_wait_free:
- iord $r14 I[$r8 + 0x000] // MMCTX_CTRL
- and $r14 0x1f
- bra e #mmctx_wait_free
-
- // queue up an entry
- ld b32 $r14 D[$r12]
- or $r14 $r9
- iowr I[$r8 + 0x300] $r14
- add b32 $r12 4
- cmpu b32 $r12 $r13
- bra ne #mmctx_exec_loop
-
- xbit $r11 $r10 2
- bra ne #mmctx_stop
- // wait for queue to empty
- mmctx_fini_wait:
- iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
- and $r11 0x1f
- cmpu b32 $r11 0x10
- bra ne #mmctx_fini_wait
- mov $r10 2 // DONE_MMCTX
- call #wait_donez
- bra #mmctx_done
- mmctx_stop:
- xbit $r11 $r10 0
- shl b32 $r11 16 // DIR
- bset $r11 12 // QLIMIT = 0x10
- bset $r11 18 // STOP_TRIGGER
- iowr I[$r8 + 0x000] $r11 // MMCTX_CTRL
- mmctx_stop_wait:
- // wait for STOP_TRIGGER to clear
- iord $r11 I[$r8 + 0x000] // MMCTX_CTRL
- xbit $r11 $r11 18
- bra ne #mmctx_stop_wait
- mmctx_done:
- trace_clr(T_MMCTX)
- ret
-
-// Wait for DONE_STRAND
-//
-strand_wait:
- push $r10
- mov $r10 2
- call #wait_donez
- pop $r10
- ret
-
-// unknown - call before issuing strand commands
-//
-strand_pre:
- mov $r8 0x4afc
- sethi $r8 0x20000
- mov $r9 0xc
- iowr I[$r8] $r9
- call #strand_wait
- ret
-
-// unknown - call after issuing strand commands
-//
-strand_post:
- mov $r8 0x4afc
- sethi $r8 0x20000
- mov $r9 0xd
- iowr I[$r8] $r9
- call #strand_wait
- ret
-
-// Selects strand set?!
-//
-// In: $r14 id
-//
-strand_set:
- mov $r10 0x4ffc
- sethi $r10 0x20000
- sub b32 $r11 $r10 0x500
- mov $r12 0xf
- iowr I[$r10 + 0x000] $r12 // 0x93c = 0xf
- mov $r12 0xb
- iowr I[$r11 + 0x000] $r12 // 0x928 = 0xb
- call #strand_wait
- iowr I[$r10 + 0x000] $r14 // 0x93c = <id>
- mov $r12 0xa
- iowr I[$r11 + 0x000] $r12 // 0x928 = 0xa
- call #strand_wait
- ret
-
-// Initialise strand context data
-//
-// In : $r15 context base
-// Out: $r15 context size (in bytes)
-//
-// Strandset(?) 3 hardcoded currently
-//
-strand_ctx_init:
- trace_set(T_STRINIT)
- call #strand_pre
- mov $r14 3
- call #strand_set
- mov $r10 0x46fc
- sethi $r10 0x20000
- add b32 $r11 $r10 0x400
- iowr I[$r10 + 0x100] $r0 // STRAND_FIRST_GENE = 0
- mov $r12 1
- iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_FIRST_GENE
- call #strand_wait
- sub b32 $r12 $r0 1
- iowr I[$r10 + 0x000] $r12 // STRAND_GENE_CNT = 0xffffffff
- mov $r12 2
- iowr I[$r11 + 0x000] $r12 // STRAND_CMD = LATCH_GENE_CNT
- call #strand_wait
- call #strand_post
-
- // read the size of each strand, poke the context offset of
- // each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
- // about it later then.
- mov $r8 0x880
- shl b32 $r8 6
- iord $r9 I[$r8 + 0x000] // STRANDS
- add b32 $r8 0x2200
- shr b32 $r14 $r15 8
- ctx_init_strand_loop:
- iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE
- iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE
- iord $r10 I[$r8 + 0x200] // STRAND_SIZE
- shr b32 $r10 6
- add b32 $r10 1
- add b32 $r14 $r10
- add b32 $r8 4
- sub b32 $r9 1
- bra ne #ctx_init_strand_loop
-
- shl b32 $r14 8
- sub b32 $r15 $r14 $r15
- trace_clr(T_STRINIT)
- ret
-')
--- /dev/null
+#ifndef __NVKM_GRAPH_OS_H__
+#define __NVKM_GRAPH_OS_H__
+
+#define E_BAD_COMMAND 0x00000001
+#define E_CMD_OVERFLOW 0x00000002
+
+#endif