afa8aacce0572ce386a1b40f382bf2368401787a
[openwrt/staging/pepe2k.git] /
1 From a1902958d144d55309a1074f74fc9b3494d3042f Mon Sep 17 00:00:00 2001
2 From: John Cox <jc@kynesim.co.uk>
3 Date: Thu, 11 Mar 2021 19:08:00 +0000
4 Subject: [PATCH] media: rpivid: Add a Pass0 to accumulate slices and
5 rework job finish
6
7 Due to overheads in assembling controls and requests it is worth having
8 the slice assembly phase separate from the h/w pass1 processing. Create
9 a queue to service pass1 rather than have the pass1 finished callback
10 trigger the next slice job.
11
12 This requires a rework of the logic that splits up the buffer and
13 request done events. This code contains two ways of doing that, we use
14 Ezequiel Garcias <ezequiel@collabora.com> solution, but expect that
15 in the future this will be handled by the framework in a cleaner manner.
16
17 Fix up the handling of some of the memory exhaustion crashes uncovered
18 in the process of writing this code.
19
20 Signed-off-by: John Cox <jc@kynesim.co.uk>
21 ---
22 drivers/media/v4l2-core/v4l2-mem2mem.c | 2 -
23 drivers/staging/media/rpivid/rpivid.c | 11 +-
24 drivers/staging/media/rpivid/rpivid.h | 20 +-
25 drivers/staging/media/rpivid/rpivid_dec.c | 32 +-
26 drivers/staging/media/rpivid/rpivid_h265.c | 432 ++++++++++++++++-----
27 drivers/staging/media/rpivid/rpivid_hw.c | 8 +-
28 6 files changed, 374 insertions(+), 131 deletions(-)
29
30 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c
31 +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
32 @@ -492,8 +492,6 @@ void v4l2_m2m_job_finish(struct v4l2_m2m
33 * holding capture buffers. Those should use
34 * v4l2_m2m_buf_done_and_job_finish() instead.
35 */
36 - WARN_ON(m2m_ctx->out_q_ctx.q.subsystem_flags &
37 - VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF);
38 spin_lock_irqsave(&m2m_dev->job_spinlock, flags);
39 schedule_next = _v4l2_m2m_job_finish(m2m_dev, m2m_ctx);
40 spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
41 --- a/drivers/staging/media/rpivid/rpivid.c
42 +++ b/drivers/staging/media/rpivid/rpivid.c
43 @@ -79,17 +79,24 @@ static const struct rpivid_control rpivi
44
45 #define rpivid_ctrls_COUNT ARRAY_SIZE(rpivid_ctrls)
46
47 -void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id)
48 +struct v4l2_ctrl *rpivid_find_ctrl(struct rpivid_ctx *ctx, u32 id)
49 {
50 unsigned int i;
51
52 for (i = 0; ctx->ctrls[i]; i++)
53 if (ctx->ctrls[i]->id == id)
54 - return ctx->ctrls[i]->p_cur.p;
55 + return ctx->ctrls[i];
56
57 return NULL;
58 }
59
60 +void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id)
61 +{
62 + struct v4l2_ctrl *const ctrl = rpivid_find_ctrl(ctx, id);
63 +
64 + return !ctrl ? NULL : ctrl->p_cur.p;
65 +}
66 +
67 static int rpivid_init_ctrls(struct rpivid_dev *dev, struct rpivid_ctx *ctx)
68 {
69 struct v4l2_ctrl_handler *hdl = &ctx->hdl;
70 --- a/drivers/staging/media/rpivid/rpivid.h
71 +++ b/drivers/staging/media/rpivid/rpivid.h
72 @@ -24,6 +24,10 @@
73
74 #define OPT_DEBUG_POLL_IRQ 0
75
76 +#define RPIVID_DEC_ENV_COUNT 6
77 +#define RPIVID_P1BUF_COUNT 3
78 +#define RPIVID_P2BUF_COUNT 3
79 +
80 #define RPIVID_NAME "rpivid"
81
82 #define RPIVID_CAPABILITY_UNTILED BIT(0)
83 @@ -45,6 +49,7 @@ struct rpivid_control {
84 };
85
86 struct rpivid_h265_run {
87 + u32 slice_ents;
88 const struct v4l2_ctrl_hevc_sps *sps;
89 const struct v4l2_ctrl_hevc_pps *pps;
90 const struct v4l2_ctrl_hevc_slice_params *slice_params;
91 @@ -64,7 +69,6 @@ struct rpivid_buffer {
92
93 struct rpivid_dec_state;
94 struct rpivid_dec_env;
95 -#define RPIVID_DEC_ENV_COUNT 3
96
97 struct rpivid_gptr {
98 size_t size;
99 @@ -79,7 +83,6 @@ typedef void (*rpivid_irq_callback)(stru
100 struct rpivid_q_aux;
101 #define RPIVID_AUX_ENT_COUNT VB2_MAX_FRAME
102
103 -#define RPIVID_P2BUF_COUNT 2
104
105 struct rpivid_ctx {
106 struct v4l2_fh fh;
107 @@ -108,11 +111,13 @@ struct rpivid_ctx {
108
109 struct rpivid_dec_env *dec_pool;
110
111 - /* Some of these should be in dev */
112 - struct rpivid_gptr bitbufs[1]; /* Will be 2 */
113 - struct rpivid_gptr cmdbufs[1]; /* Will be 2 */
114 + unsigned int p1idx;
115 + atomic_t p1out;
116 + struct rpivid_gptr bitbufs[RPIVID_P1BUF_COUNT];
117 + struct rpivid_gptr cmdbufs[RPIVID_P1BUF_COUNT];
118 +
119 + /* *** Should be in dev *** */
120 unsigned int p2idx;
121 - atomic_t p2out;
122 struct rpivid_gptr pu_bufs[RPIVID_P2BUF_COUNT];
123 struct rpivid_gptr coeff_bufs[RPIVID_P2BUF_COUNT];
124
125 @@ -141,6 +146,8 @@ struct rpivid_variant {
126
127 struct rpivid_hw_irq_ent;
128
129 +#define RPIVID_ICTL_ENABLE_UNLIMITED (-1)
130 +
131 struct rpivid_hw_irq_ctrl {
132 /* Spinlock protecting claim and tail */
133 spinlock_t lock;
134 @@ -182,6 +189,7 @@ struct rpivid_dev {
135
136 extern struct rpivid_dec_ops rpivid_dec_ops_h265;
137
138 +struct v4l2_ctrl *rpivid_find_ctrl(struct rpivid_ctx *ctx, u32 id);
139 void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id);
140
141 #endif
142 --- a/drivers/staging/media/rpivid/rpivid_dec.c
143 +++ b/drivers/staging/media/rpivid/rpivid_dec.c
144 @@ -21,8 +21,8 @@
145
146 void rpivid_device_run(void *priv)
147 {
148 - struct rpivid_ctx *ctx = priv;
149 - struct rpivid_dev *dev = ctx->dev;
150 + struct rpivid_ctx *const ctx = priv;
151 + struct rpivid_dev *const dev = ctx->dev;
152 struct rpivid_run run = {};
153 struct media_request *src_req;
154
155 @@ -32,19 +32,17 @@ void rpivid_device_run(void *priv)
156 if (!run.src || !run.dst) {
157 v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n",
158 __func__, run.src, run.dst);
159 - /* We are stuffed - this probably won't dig us out of our
160 - * current situation but it is better than nothing
161 - */
162 - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
163 - VB2_BUF_STATE_ERROR);
164 - return;
165 + goto fail;
166 }
167
168 - /* Apply request(s) controls if needed. */
169 + /* Apply request(s) controls */
170 src_req = run.src->vb2_buf.req_obj.req;
171 + if (!src_req) {
172 + v4l2_err(&dev->v4l2_dev, "%s: Missing request\n", __func__);
173 + goto fail;
174 + }
175
176 - if (src_req)
177 - v4l2_ctrl_request_setup(src_req, &ctx->hdl);
178 + v4l2_ctrl_request_setup(src_req, &ctx->hdl);
179
180 switch (ctx->src_fmt.pixelformat) {
181 case V4L2_PIX_FMT_HEVC_SLICE:
182 @@ -70,10 +68,14 @@ void rpivid_device_run(void *priv)
183
184 dev->dec_ops->setup(ctx, &run);
185
186 - /* Complete request(s) controls if needed. */
187 -
188 - if (src_req)
189 - v4l2_ctrl_request_complete(src_req, &ctx->hdl);
190 + /* Complete request(s) controls */
191 + v4l2_ctrl_request_complete(src_req, &ctx->hdl);
192
193 dev->dec_ops->trigger(ctx);
194 + return;
195 +
196 +fail:
197 + /* We really shouldn't get here but tidy up what we can */
198 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
199 + VB2_BUF_STATE_ERROR);
200 }
201 --- a/drivers/staging/media/rpivid/rpivid_h265.c
202 +++ b/drivers/staging/media/rpivid/rpivid_h265.c
203 @@ -22,6 +22,8 @@
204 #define DEBUG_TRACE_P1_CMD 0
205 #define DEBUG_TRACE_EXECUTION 0
206
207 +#define USE_REQUEST_PIN 1
208 +
209 #if DEBUG_TRACE_EXECUTION
210 #define xtrace_in(dev_, de_)\
211 v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: in\n", __func__,\
212 @@ -192,8 +194,6 @@ struct rpivid_dec_env {
213 unsigned int decode_order;
214 int p1_status; /* P1 status - what to realloc */
215
216 - struct rpivid_dec_env *phase_wait_q_next;
217 -
218 struct rpi_cmd *cmd_fifo;
219 unsigned int cmd_len, cmd_max;
220 unsigned int num_slice_msgs;
221 @@ -219,6 +219,7 @@ struct rpivid_dec_env {
222 u32 rpi_currpoc;
223
224 struct vb2_v4l2_buffer *frame_buf; // Detached dest buffer
225 + struct vb2_v4l2_buffer *src_buf; // Detached src buffer
226 unsigned int frame_c_offset;
227 unsigned int frame_stride;
228 dma_addr_t frame_addr;
229 @@ -235,9 +236,15 @@ struct rpivid_dec_env {
230 size_t bit_copy_len;
231 struct rpivid_gptr *cmd_copy_gptr;
232
233 - u16 slice_msgs[2 * HEVC_MAX_REFS * 8 + 3];
234 +#define SLICE_MSGS_MAX (2 * HEVC_MAX_REFS * 8 + 3)
235 + u16 slice_msgs[SLICE_MSGS_MAX];
236 u8 scaling_factors[NUM_SCALING_FACTORS];
237
238 +#if USE_REQUEST_PIN
239 + struct media_request *req_pin;
240 +#else
241 + struct media_request_object *req_obj;
242 +#endif
243 struct rpivid_hw_irq_ent irq_ent;
244 };
245
246 @@ -286,6 +293,17 @@ struct rpivid_dec_state {
247 unsigned int prev_ctb_y;
248 };
249
250 +#if !USE_REQUEST_PIN
251 +static void dst_req_obj_release(struct media_request_object *object)
252 +{
253 + kfree(object);
254 +}
255 +
256 +static const struct media_request_object_ops dst_req_obj_ops = {
257 + .release = dst_req_obj_release,
258 +};
259 +#endif
260 +
261 static inline int clip_int(const int x, const int lo, const int hi)
262 {
263 return x < lo ? lo : x > hi ? hi : x;
264 @@ -298,15 +316,48 @@ static inline int clip_int(const int x,
265 static int p1_z;
266 #endif
267
268 +static int cmds_check_space(struct rpivid_dec_env *const de, unsigned int n)
269 +{
270 + struct rpi_cmd *a;
271 + unsigned int newmax;
272 +
273 + if (n > 0x100000) {
274 + v4l2_err(&de->ctx->dev->v4l2_dev,
275 + "%s: n %u implausible\n", __func__, n);
276 + return -ENOMEM;
277 + }
278 +
279 + if (de->cmd_len + n <= de->cmd_max)
280 + return 0;
281 +
282 + newmax = 2 << log2_size(de->cmd_len + n);
283 +
284 + a = krealloc(de->cmd_fifo, newmax * sizeof(struct rpi_cmd),
285 + GFP_KERNEL);
286 + if (!a) {
287 + v4l2_err(&de->ctx->dev->v4l2_dev,
288 + "Failed cmd buffer realloc from %u to %u\n",
289 + de->cmd_max, newmax);
290 + return -ENOMEM;
291 + }
292 + v4l2_info(&de->ctx->dev->v4l2_dev,
293 + "cmd buffer realloc from %u to %u\n", de->cmd_max, newmax);
294 +
295 + de->cmd_fifo = a;
296 + de->cmd_max = newmax;
297 + return 0;
298 +}
299 +
300 // ???? u16 addr - put in u32
301 -static int p1_apb_write(struct rpivid_dec_env *const de, const u16 addr,
302 - const u32 data)
303 +static void p1_apb_write(struct rpivid_dec_env *const de, const u16 addr,
304 + const u32 data)
305 {
306 - if (de->cmd_len == de->cmd_max)
307 - de->cmd_fifo =
308 - krealloc(de->cmd_fifo,
309 - (de->cmd_max *= 2) * sizeof(struct rpi_cmd),
310 - GFP_KERNEL);
311 + if (de->cmd_len >= de->cmd_max) {
312 + v4l2_err(&de->ctx->dev->v4l2_dev,
313 + "%s: Overflow @ %d\n", __func__, de->cmd_len);
314 + return;
315 + }
316 +
317 de->cmd_fifo[de->cmd_len].addr = addr;
318 de->cmd_fifo[de->cmd_len].data = data;
319
320 @@ -316,8 +367,7 @@ static int p1_apb_write(struct rpivid_de
321 de->cmd_len, addr, data);
322 }
323 #endif
324 -
325 - return de->cmd_len++;
326 + de->cmd_len++;
327 }
328
329 static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num)
330 @@ -511,6 +561,7 @@ static const u8 prob_init[3][156] = {
331 },
332 };
333
334 +#define CMDS_WRITE_PROB ((RPI_PROB_ARRAY_SIZE / 4) + 1)
335 static void write_prob(struct rpivid_dec_env *const de,
336 const struct rpivid_dec_state *const s)
337 {
338 @@ -554,6 +605,7 @@ static void write_prob(struct rpivid_dec
339 p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
340 }
341
342 +#define CMDS_WRITE_SCALING_FACTORS NUM_SCALING_FACTORS
343 static void write_scaling_factors(struct rpivid_dec_env *const de)
344 {
345 int i;
346 @@ -569,8 +621,9 @@ static inline __u32 dma_to_axi_addr(dma_
347 return (__u32)(a >> 6);
348 }
349
350 -static void write_bitstream(struct rpivid_dec_env *const de,
351 - const struct rpivid_dec_state *const s)
352 +#define CMDS_WRITE_BITSTREAM 4
353 +static int write_bitstream(struct rpivid_dec_env *const de,
354 + const struct rpivid_dec_state *const s)
355 {
356 // Note that FFmpeg V4L2 does not remove emulation prevention bytes,
357 // so this is matched in the configuration here.
358 @@ -584,6 +637,13 @@ static void write_bitstream(struct rpivi
359 if (s->src_addr != 0) {
360 addr = s->src_addr + offset;
361 } else {
362 + if (len + de->bit_copy_len > de->bit_copy_gptr->size) {
363 + v4l2_warn(&de->ctx->dev->v4l2_dev,
364 + "Bit copy buffer overflow: size=%zu, offset=%zu, len=%u\n",
365 + de->bit_copy_gptr->size,
366 + de->bit_copy_len, len);
367 + return -ENOMEM;
368 + }
369 memcpy(de->bit_copy_gptr->ptr + de->bit_copy_len,
370 s->src_buf + offset, len);
371 addr = de->bit_copy_gptr->addr + de->bit_copy_len;
372 @@ -595,6 +655,7 @@ static void write_bitstream(struct rpivi
373 p1_apb_write(de, RPI_BFNUM, len);
374 p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop
375 p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6));
376 + return 0;
377 }
378
379 //////////////////////////////////////////////////////////////////////////////
380 @@ -623,6 +684,7 @@ static u32 slice_reg_const(const struct
381
382 //////////////////////////////////////////////////////////////////////////////
383
384 +#define CMDS_NEW_SLICE_SEGMENT (4 + CMDS_WRITE_SCALING_FACTORS)
385 static void new_slice_segment(struct rpivid_dec_env *const de,
386 const struct rpivid_dec_state *const s)
387 {
388 @@ -706,6 +768,7 @@ static void msg_slice(struct rpivid_dec_
389 de->slice_msgs[de->num_slice_msgs++] = msg;
390 }
391
392 +#define CMDS_PROGRAM_SLICECMDS (1 + SLICE_MSGS_MAX)
393 static void program_slicecmds(struct rpivid_dec_env *const de,
394 const int sliceid)
395 {
396 @@ -902,6 +965,7 @@ static void pre_slice_decode(struct rpiv
397 (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF
398 }
399
400 +#define CMDS_WRITE_SLICE 1
401 static void write_slice(struct rpivid_dec_env *const de,
402 const struct rpivid_dec_state *const s,
403 const u32 slice_const,
404 @@ -927,6 +991,7 @@ static void write_slice(struct rpivid_de
405 * N.B. This can be called to fill in data from the previous slice so must not
406 * use any state data that may change from slice to slice (e.g. qp)
407 */
408 +#define CMDS_NEW_ENTRY_POINT (6 + CMDS_WRITE_SLICE)
409 static void new_entry_point(struct rpivid_dec_env *const de,
410 const struct rpivid_dec_state *const s,
411 const bool do_bte,
412 @@ -977,6 +1042,7 @@ static void new_entry_point(struct rpivi
413 //////////////////////////////////////////////////////////////////////////////
414 // Wavefront mode
415
416 +#define CMDS_WPP_PAUSE 4
417 static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row)
418 {
419 p1_apb_write(de, RPI_STATUS, (ctb_row << 18) | 0x25);
420 @@ -987,12 +1053,19 @@ static void wpp_pause(struct rpivid_dec_
421 p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2);
422 }
423
424 -static void wpp_entry_fill(struct rpivid_dec_env *const de,
425 - const struct rpivid_dec_state *const s,
426 - const unsigned int last_y)
427 +#define CMDS_WPP_ENTRY_FILL_1 (CMDS_WPP_PAUSE + 2 + CMDS_NEW_ENTRY_POINT)
428 +static int wpp_entry_fill(struct rpivid_dec_env *const de,
429 + const struct rpivid_dec_state *const s,
430 + const unsigned int last_y)
431 {
432 + int rv;
433 const unsigned int last_x = s->ctb_width - 1;
434
435 + rv = cmds_check_space(de, CMDS_WPP_ENTRY_FILL_1 *
436 + (last_y - de->entry_ctb_y));
437 + if (rv)
438 + return rv;
439 +
440 while (de->entry_ctb_y < last_y) {
441 /* wpp_entry_x/y set by wpp_entry_point */
442 if (s->ctb_width > 2)
443 @@ -1010,12 +1083,21 @@ static void wpp_entry_fill(struct rpivid
444 0, 0, 0, de->entry_ctb_y + 1,
445 de->entry_qp, de->entry_slice);
446 }
447 + return 0;
448 }
449
450 -static void wpp_end_previous_slice(struct rpivid_dec_env *const de,
451 - const struct rpivid_dec_state *const s)
452 +static int wpp_end_previous_slice(struct rpivid_dec_env *const de,
453 + const struct rpivid_dec_state *const s)
454 {
455 - wpp_entry_fill(de, s, s->prev_ctb_y);
456 + int rv;
457 +
458 + rv = wpp_entry_fill(de, s, s->prev_ctb_y);
459 + if (rv)
460 + return rv;
461 +
462 + rv = cmds_check_space(de, CMDS_WPP_PAUSE + 2);
463 + if (rv)
464 + return rv;
465
466 if (de->entry_ctb_x < 2 &&
467 (de->entry_ctb_y < s->start_ctb_y || s->start_ctb_x > 2) &&
468 @@ -1026,21 +1108,38 @@ static void wpp_end_previous_slice(struc
469 if (s->start_ctb_x == 2 ||
470 (s->ctb_width == 2 && de->entry_ctb_y < s->start_ctb_y))
471 p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
472 + return 0;
473 }
474
475 /* Only main profile supported so WPP => !Tiles which makes some of the
476 * next chunk code simpler
477 */
478 -static void wpp_decode_slice(struct rpivid_dec_env *const de,
479 - const struct rpivid_dec_state *const s)
480 +static int wpp_decode_slice(struct rpivid_dec_env *const de,
481 + const struct rpivid_dec_state *const s)
482 {
483 bool reset_qp_y = true;
484 const bool indep = !s->dependent_slice_segment_flag;
485 + int rv;
486
487 - if (s->start_ts)
488 - wpp_end_previous_slice(de, s);
489 + if (s->start_ts) {
490 + rv = wpp_end_previous_slice(de, s);
491 + if (rv)
492 + return rv;
493 + }
494 pre_slice_decode(de, s);
495 - write_bitstream(de, s);
496 +
497 + rv = cmds_check_space(de,
498 + CMDS_WRITE_BITSTREAM +
499 + CMDS_WRITE_PROB +
500 + CMDS_PROGRAM_SLICECMDS +
501 + CMDS_NEW_SLICE_SEGMENT +
502 + CMDS_NEW_ENTRY_POINT);
503 + if (rv)
504 + return rv;
505 +
506 + rv = write_bitstream(de, s);
507 + if (rv)
508 + return rv;
509
510 if (!s->start_ts || indep || s->ctb_width == 1)
511 write_prob(de, s);
512 @@ -1056,7 +1155,13 @@ static void wpp_decode_slice(struct rpiv
513 s->slice_qp, slice_reg_const(s));
514
515 if (s->frame_end) {
516 - wpp_entry_fill(de, s, s->ctb_height - 1);
517 + rv = wpp_entry_fill(de, s, s->ctb_height - 1);
518 + if (rv)
519 + return rv;
520 +
521 + rv = cmds_check_space(de, CMDS_WPP_PAUSE + 1);
522 + if (rv)
523 + return rv;
524
525 if (de->entry_ctb_x < 2 && s->ctb_width > 2)
526 wpp_pause(de, s->ctb_height - 1);
527 @@ -1065,25 +1170,32 @@ static void wpp_decode_slice(struct rpiv
528 1 | ((s->ctb_width - 1) << 5) |
529 ((s->ctb_height - 1) << 18));
530 }
531 -
532 + return 0;
533 }
534
535 //////////////////////////////////////////////////////////////////////////////
536 // Tiles mode
537
538 -static void tile_entry_fill(struct rpivid_dec_env *const de,
539 - const struct rpivid_dec_state *const s,
540 - const unsigned int last_tile_x,
541 - const unsigned int last_tile_y)
542 +// Guarantees 1 cmd entry free on exit
543 +static int tile_entry_fill(struct rpivid_dec_env *const de,
544 + const struct rpivid_dec_state *const s,
545 + const unsigned int last_tile_x,
546 + const unsigned int last_tile_y)
547 {
548 while (de->entry_tile_y < last_tile_y ||
549 (de->entry_tile_y == last_tile_y &&
550 de->entry_tile_x < last_tile_x)) {
551 + int rv;
552 unsigned int t_x = de->entry_tile_x;
553 unsigned int t_y = de->entry_tile_y;
554 const unsigned int last_x = s->col_bd[t_x + 1] - 1;
555 const unsigned int last_y = s->row_bd[t_y + 1] - 1;
556
557 + // One more than needed here
558 + rv = cmds_check_space(de, CMDS_NEW_ENTRY_POINT + 3);
559 + if (rv)
560 + return rv;
561 +
562 p1_apb_write(de, RPI_STATUS,
563 2 | (last_x << 5) | (last_y << 18));
564 p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
565 @@ -1098,33 +1210,55 @@ static void tile_entry_fill(struct rpivi
566 t_x, t_y, s->col_bd[t_x], s->row_bd[t_y],
567 de->entry_qp, de->entry_slice);
568 }
569 + return 0;
570 }
571
572 /*
573 * Write STATUS register with expected end CTU address of previous slice
574 */
575 -static void end_previous_slice(struct rpivid_dec_env *const de,
576 - const struct rpivid_dec_state *const s)
577 +static int end_previous_slice(struct rpivid_dec_env *const de,
578 + const struct rpivid_dec_state *const s)
579 {
580 - tile_entry_fill(de, s,
581 - ctb_to_tile_x(s, s->prev_ctb_x),
582 - ctb_to_tile_y(s, s->prev_ctb_y));
583 + int rv;
584 +
585 + rv = tile_entry_fill(de, s,
586 + ctb_to_tile_x(s, s->prev_ctb_x),
587 + ctb_to_tile_y(s, s->prev_ctb_y));
588 + if (rv)
589 + return rv;
590 +
591 p1_apb_write(de, RPI_STATUS,
592 1 | (s->prev_ctb_x << 5) | (s->prev_ctb_y << 18));
593 + return 0;
594 }
595
596 -static void decode_slice(struct rpivid_dec_env *const de,
597 - const struct rpivid_dec_state *const s)
598 +static int decode_slice(struct rpivid_dec_env *const de,
599 + const struct rpivid_dec_state *const s)
600 {
601 bool reset_qp_y;
602 unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x);
603 unsigned int tile_y = ctb_to_tile_y(s, s->start_ctb_y);
604 + int rv;
605
606 - if (s->start_ts)
607 - end_previous_slice(de, s);
608 + if (s->start_ts) {
609 + rv = end_previous_slice(de, s);
610 + if (rv)
611 + return rv;
612 + }
613 +
614 + rv = cmds_check_space(de,
615 + CMDS_WRITE_BITSTREAM +
616 + CMDS_WRITE_PROB +
617 + CMDS_PROGRAM_SLICECMDS +
618 + CMDS_NEW_SLICE_SEGMENT +
619 + CMDS_NEW_ENTRY_POINT);
620 + if (rv)
621 + return rv;
622
623 pre_slice_decode(de, s);
624 - write_bitstream(de, s);
625 + rv = write_bitstream(de, s);
626 + if (rv)
627 + return rv;
628
629 reset_qp_y = !s->start_ts ||
630 !s->dependent_slice_segment_flag ||
631 @@ -1146,13 +1280,16 @@ static void decode_slice(struct rpivid_d
632 * when it will be known where this slice finishes
633 */
634 if (s->frame_end) {
635 - tile_entry_fill(de, s,
636 - s->tile_width - 1,
637 - s->tile_height - 1);
638 + rv = tile_entry_fill(de, s,
639 + s->tile_width - 1,
640 + s->tile_height - 1);
641 + if (rv)
642 + return rv;
643 p1_apb_write(de, RPI_STATUS,
644 1 | ((s->ctb_width - 1) << 5) |
645 ((s->ctb_height - 1) << 18));
646 }
647 + return 0;
648 }
649
650 //////////////////////////////////////////////////////////////////////////////
651 @@ -1524,7 +1661,7 @@ static void rpivid_h265_setup(struct rpi
652 struct rpivid_dev *const dev = ctx->dev;
653 const struct v4l2_ctrl_hevc_slice_params *const sh =
654 run->h265.slice_params;
655 - const struct v4l2_hevc_pred_weight_table *pred_weight_table;
656 +// const struct v4l2_hevc_pred_weight_table *pred_weight_table;
657 struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
658 struct rpivid_dec_state *const s = ctx->state;
659 struct vb2_queue *vq;
660 @@ -1532,11 +1669,12 @@ static void rpivid_h265_setup(struct rpi
661 unsigned int prev_rs;
662 unsigned int i;
663 int use_aux;
664 + int rv;
665 bool slice_temporal_mvp;
666
667 xtrace_in(dev, de);
668
669 - pred_weight_table = &sh->pred_weight_table;
670 +// pred_weight_table = &sh->pred_weight_table;
671
672 s->frame_end =
673 ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
674 @@ -1608,9 +1746,9 @@ static void rpivid_h265_setup(struct rpi
675 de->cmd_len = 0;
676 de->dpbno_col = ~0U;
677
678 - de->bit_copy_gptr = ctx->bitbufs + 0;
679 + de->bit_copy_gptr = ctx->bitbufs + ctx->p1idx;
680 de->bit_copy_len = 0;
681 - de->cmd_copy_gptr = ctx->cmdbufs + 0;
682 + de->cmd_copy_gptr = ctx->cmdbufs + ctx->p1idx;
683
684 de->frame_c_offset = ctx->dst_fmt.height * 128;
685 de->frame_stride = ctx->dst_fmt.plane_fmt[0].bytesperline * 128;
686 @@ -1727,6 +1865,9 @@ static void rpivid_h265_setup(struct rpi
687 bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
688 wxh < 983040 * 2 ? 983040 * 3 / 4 :
689 wxh * 3 / 8;
690 + /* Allow for bit depth */
691 + bits_alloc += (bits_alloc *
692 + s->sps.bit_depth_luma_minus8) / 8;
693 bits_alloc = round_up_size(bits_alloc);
694
695 if (gptr_alloc(dev, de->bit_copy_gptr,
696 @@ -1743,18 +1884,35 @@ static void rpivid_h265_setup(struct rpi
697 }
698 }
699
700 - // Pre calc a few things
701 - s->src_addr =
702 - !s->frame_end ?
703 - 0 :
704 - vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0);
705 - s->src_buf = s->src_addr != 0 ? NULL :
706 - vb2_plane_vaddr(&run->src->vb2_buf, 0);
707 + // Either map src buffer or use directly
708 + s->src_addr = 0;
709 + s->src_buf = NULL;
710 +
711 + if (run->src->planes[0].bytesused < (sh->bit_size + 7) / 8) {
712 + v4l2_warn(&dev->v4l2_dev,
713 + "Bit size %d > bytesused %d\n",
714 + sh->bit_size, run->src->planes[0].bytesused);
715 + goto fail;
716 + }
717 + if (sh->data_bit_offset >= sh->bit_size ||
718 + sh->bit_size - sh->data_bit_offset < 8) {
719 + v4l2_warn(&dev->v4l2_dev,
720 + "Bit size %d < Bit offset %d + 8\n",
721 + sh->bit_size, sh->data_bit_offset);
722 + goto fail;
723 + }
724 +
725 + if (s->frame_end)
726 + s->src_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf,
727 + 0);
728 + if (!s->src_addr)
729 + s->src_buf = vb2_plane_vaddr(&run->src->vb2_buf, 0);
730 if (!s->src_addr && !s->src_buf) {
731 v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n");
732 goto fail;
733 }
734
735 + // Pre calc a few things
736 s->sh = sh;
737 s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta;
738 s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
739 @@ -1785,9 +1943,11 @@ static void rpivid_h265_setup(struct rpi
740 s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
741
742 if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
743 - wpp_decode_slice(de, s);
744 + rv = wpp_decode_slice(de, s);
745 else
746 - decode_slice(de, s);
747 + rv = decode_slice(de, s);
748 + if (rv)
749 + goto fail;
750
751 if (!s->frame_end) {
752 xtrace_ok(dev, de);
753 @@ -1945,29 +2105,28 @@ static int check_status(const struct rpi
754 return -1;
755 }
756
757 -static void cb_phase2(struct rpivid_dev *const dev, void *v)
758 +static void phase2_cb(struct rpivid_dev *const dev, void *v)
759 {
760 struct rpivid_dec_env *const de = v;
761 - struct rpivid_ctx *const ctx = de->ctx;
762
763 xtrace_in(dev, de);
764
765 - v4l2_m2m_cap_buf_return(dev->m2m_dev, ctx->fh.m2m_ctx, de->frame_buf,
766 - VB2_BUF_STATE_DONE);
767 - de->frame_buf = NULL;
768 + /* Done with buffers - allow new P1 */
769 + rpivid_hw_irq_active1_enable_claim(dev, 1);
770
771 - /* Delete de before finish as finish might immediately trigger a reuse
772 - * of de
773 - */
774 - dec_env_delete(de);
775 + v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_DONE);
776 + de->frame_buf = NULL;
777
778 - if (atomic_add_return(-1, &ctx->p2out) >= RPIVID_P2BUF_COUNT - 1) {
779 - xtrace_fin(dev, de);
780 - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
781 - VB2_BUF_STATE_DONE);
782 - }
783 +#if USE_REQUEST_PIN
784 + media_request_unpin(de->req_pin);
785 + de->req_pin = NULL;
786 +#else
787 + media_request_object_complete(de->req_obj);
788 + de->req_obj = NULL;
789 +#endif
790
791 xtrace_ok(dev, de);
792 + dec_env_delete(de);
793 }
794
795 static void phase2_claimed(struct rpivid_dev *const dev, void *v)
796 @@ -2023,7 +2182,7 @@ static void phase2_claimed(struct rpivid
797 // de->ctx->colmvbuf.addr, de->ctx->colmvbuf.addr +
798 // de->ctx->colmvbuf.size);
799
800 - rpivid_hw_irq_active2_irq(dev, &de->irq_ent, cb_phase2, de);
801 + rpivid_hw_irq_active2_irq(dev, &de->irq_ent, phase2_cb, de);
802
803 apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y);
804
805 @@ -2032,6 +2191,39 @@ static void phase2_claimed(struct rpivid
806
807 static void phase1_claimed(struct rpivid_dev *const dev, void *v);
808
809 +// release any and all objects associated with de
810 +// and reenable phase 1 if required
811 +static void phase1_err_fin(struct rpivid_dev *const dev,
812 + struct rpivid_ctx *const ctx,
813 + struct rpivid_dec_env *const de)
814 +{
815 + /* Return all detached buffers */
816 + if (de->src_buf)
817 + v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_ERROR);
818 + de->src_buf = NULL;
819 + if (de->frame_buf)
820 + v4l2_m2m_buf_done(de->frame_buf, VB2_BUF_STATE_ERROR);
821 + de->frame_buf = NULL;
822 +#if USE_REQUEST_PIN
823 + if (de->req_pin)
824 + media_request_unpin(de->req_pin);
825 + de->req_pin = NULL;
826 +#else
827 + if (de->req_obj)
828 + media_request_object_complete(de->req_obj);
829 + de->req_obj = NULL;
830 +#endif
831 +
832 + dec_env_delete(de);
833 +
834 + /* Reenable phase 0 if we were blocking */
835 + if (atomic_add_return(-1, &ctx->p1out) >= RPIVID_P1BUF_COUNT - 1)
836 + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
837 +
838 + /* Done with P1-P2 buffers - allow new P1 */
839 + rpivid_hw_irq_active1_enable_claim(dev, 1);
840 +}
841 +
842 static void phase1_thread(struct rpivid_dev *const dev, void *v)
843 {
844 struct rpivid_dec_env *const de = v;
845 @@ -2076,15 +2268,12 @@ fail:
846 __func__);
847 ctx->fatal_err = 1;
848 }
849 - dec_env_delete(de);
850 - xtrace_fin(dev, de);
851 - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
852 - VB2_BUF_STATE_ERROR);
853 xtrace_fail(dev, de);
854 + phase1_err_fin(dev, ctx, de);
855 }
856
857 /* Always called in irq context (this is good) */
858 -static void cb_phase1(struct rpivid_dev *const dev, void *v)
859 +static void phase1_cb(struct rpivid_dev *const dev, void *v)
860 {
861 struct rpivid_dec_env *const de = v;
862 struct rpivid_ctx *const ctx = de->ctx;
863 @@ -2092,6 +2281,7 @@ static void cb_phase1(struct rpivid_dev
864 xtrace_in(dev, de);
865
866 de->p1_status = check_status(dev);
867 +
868 if (de->p1_status != 0) {
869 v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n",
870 __func__, de->p1_status);
871 @@ -2105,24 +2295,17 @@ static void cb_phase1(struct rpivid_dev
872 return;
873 }
874
875 - /* After the frame-buf is detached it must be returned but from
876 - * this point onward (phase2_claimed, cb_phase2) there are no error
877 - * paths so the return at the end of cb_phase2 is all that is needed
878 - */
879 - de->frame_buf = v4l2_m2m_cap_buf_detach(dev->m2m_dev, ctx->fh.m2m_ctx);
880 - if (!de->frame_buf) {
881 - v4l2_err(&dev->v4l2_dev, "%s: No detached buffer\n", __func__);
882 - goto fail;
883 - }
884 + v4l2_m2m_buf_done(de->src_buf, VB2_BUF_STATE_DONE);
885 + de->src_buf = NULL;
886
887 + /* All phase1 error paths done - it is safe to inc p2idx */
888 ctx->p2idx =
889 (ctx->p2idx + 1 >= RPIVID_P2BUF_COUNT) ? 0 : ctx->p2idx + 1;
890
891 - // Enable the next setup if our Q isn't too big
892 - if (atomic_add_return(1, &ctx->p2out) < RPIVID_P2BUF_COUNT) {
893 + /* Renable the next setup if we were blocking */
894 + if (atomic_add_return(-1, &ctx->p1out) >= RPIVID_P1BUF_COUNT - 1) {
895 xtrace_fin(dev, de);
896 - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
897 - VB2_BUF_STATE_DONE);
898 + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
899 }
900
901 rpivid_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de);
902 @@ -2131,11 +2314,8 @@ static void cb_phase1(struct rpivid_dev
903 return;
904
905 fail:
906 - dec_env_delete(de);
907 - xtrace_fin(dev, de);
908 - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
909 - VB2_BUF_STATE_ERROR);
910 xtrace_fail(dev, de);
911 + phase1_err_fin(dev, ctx, de);
912 }
913
914 static void phase1_claimed(struct rpivid_dev *const dev, void *v)
915 @@ -2160,6 +2340,10 @@ static void phase1_claimed(struct rpivid
916 de->coeff_stride =
917 ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64);
918
919 + /* phase1_claimed blocked until cb_phase1 completed so p2idx inc
920 + * in cb_phase1 after error detection
921 + */
922 +
923 apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc);
924 apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride);
925 apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc);
926 @@ -2169,7 +2353,7 @@ static void phase1_claimed(struct rpivid
927 apb_write(dev, RPI_CFNUM, de->cmd_len);
928
929 // Claim irq
930 - rpivid_hw_irq_active1_irq(dev, &de->irq_ent, cb_phase1, de);
931 + rpivid_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de);
932
933 // And start the h/w
934 apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr);
935 @@ -2178,11 +2362,8 @@ static void phase1_claimed(struct rpivid
936 return;
937
938 fail:
939 - dec_env_delete(de);
940 - xtrace_fin(dev, de);
941 - v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
942 - VB2_BUF_STATE_ERROR);
943 xtrace_fail(dev, de);
944 + phase1_err_fin(dev, ctx, de);
945 }
946
947 static void dec_state_delete(struct rpivid_ctx *const ctx)
948 @@ -2315,7 +2496,9 @@ static void rpivid_h265_trigger(struct r
949 case RPIVID_DECODE_SLICE_CONTINUE:
950 v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
951 VB2_BUF_STATE_DONE);
952 + xtrace_ok(dev, de);
953 break;
954 +
955 default:
956 v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__,
957 de->state);
958 @@ -2329,14 +2512,59 @@ static void rpivid_h265_trigger(struct r
959 v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
960 VB2_BUF_STATE_ERROR);
961 break;
962 +
963 case RPIVID_DECODE_PHASE1:
964 ctx->dec0 = NULL;
965 +
966 +#if !USE_REQUEST_PIN
967 + /* Alloc a new request object - needs to be alloced dynamically
968 + * as the media request will release it some random time after
969 + * it is completed
970 + */
971 + de->req_obj = kmalloc(sizeof(*de->req_obj), GFP_KERNEL);
972 + if (!de->req_obj) {
973 + xtrace_fail(dev, de);
974 + dec_env_delete(de);
975 + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev,
976 + ctx->fh.m2m_ctx,
977 + VB2_BUF_STATE_ERROR);
978 + break;
979 + }
980 + media_request_object_init(de->req_obj);
981 +#warning probably needs to _get the req obj too
982 +#endif
983 + ctx->p1idx = (ctx->p1idx + 1 >= RPIVID_P1BUF_COUNT) ?
984 + 0 : ctx->p1idx + 1;
985 +
986 + /* We know we have src & dst so no need to test */
987 + de->src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
988 + de->frame_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
989 +
990 +#if USE_REQUEST_PIN
991 + de->req_pin = de->src_buf->vb2_buf.req_obj.req;
992 + media_request_pin(de->req_pin);
993 +#else
994 + media_request_object_bind(de->src_buf->vb2_buf.req_obj.req,
995 + &dst_req_obj_ops, de, false,
996 + de->req_obj);
997 +#endif
998 +
999 + /* We could get rid of the src buffer here if we've already
1000 + * copied it, but we don't copy the last buffer unless it
1001 + * didn't return a contig dma addr and that shouldn't happen
1002 + */
1003 +
1004 + /* Enable the next setup if our Q isn't too big */
1005 + if (atomic_add_return(1, &ctx->p1out) < RPIVID_P1BUF_COUNT) {
1006 + xtrace_fin(dev, de);
1007 + v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
1008 + }
1009 +
1010 rpivid_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed,
1011 de);
1012 + xtrace_ok(dev, de);
1013 break;
1014 }
1015 -
1016 - xtrace_ok(dev, de);
1017 }
1018
1019 struct rpivid_dec_ops rpivid_dec_ops_h265 = {
1020 --- a/drivers/staging/media/rpivid/rpivid_hw.c
1021 +++ b/drivers/staging/media/rpivid/rpivid_hw.c
1022 @@ -185,14 +185,14 @@ static void do_enable_claim(struct rpivi
1023 sched_cb(dev, ictl, ient);
1024 }
1025
1026 -static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl)
1027 +static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl, int enables)
1028 {
1029 spin_lock_init(&ictl->lock);
1030 ictl->claim = NULL;
1031 ictl->tail = NULL;
1032 ictl->irq = NULL;
1033 ictl->no_sched = 0;
1034 - ictl->enable = -1;
1035 + ictl->enable = enables;
1036 ictl->thread_reqed = false;
1037 }
1038
1039 @@ -308,8 +308,8 @@ int rpivid_hw_probe(struct rpivid_dev *d
1040 int irq_dec;
1041 int ret = 0;
1042
1043 - ictl_init(&dev->ic_active1);
1044 - ictl_init(&dev->ic_active2);
1045 + ictl_init(&dev->ic_active1, RPIVID_P2BUF_COUNT);
1046 + ictl_init(&dev->ic_active2, RPIVID_ICTL_ENABLE_UNLIMITED);
1047
1048 res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "intc");
1049 if (!res)