summaryrefslogtreecommitdiffstats
path: root/drivers/spi/spi-qcom-qspi.c
blob: a3991e617c907c4bb3b68dbe9f810dc870660047 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017-2018, The Linux foundation. All rights reserved.

#include <linux/clk.h>
#include <linux/dmapool.h>
#include <linux/dma-mapping.h>
#include <linux/interconnect.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/pinctrl/consumer.h>
#include <linux/pm_runtime.h>
#include <linux/pm_opp.h>
#include <linux/spi/spi.h>
#include <linux/spi/spi-mem.h>


#define QSPI_NUM_CS		2
#define QSPI_BYTES_PER_WORD	4

#define MSTR_CONFIG		0x0000
#define FULL_CYCLE_MODE		BIT(3)
#define FB_CLK_EN		BIT(4)
#define PIN_HOLDN		BIT(6)
#define PIN_WPN			BIT(7)
#define DMA_ENABLE		BIT(8)
#define BIG_ENDIAN_MODE		BIT(9)
#define SPI_MODE_MSK		0xc00
#define SPI_MODE_SHFT		10
#define CHIP_SELECT_NUM		BIT(12)
#define SBL_EN			BIT(13)
#define LPA_BASE_MSK		0x3c000
#define LPA_BASE_SHFT		14
#define TX_DATA_DELAY_MSK	0xc0000
#define TX_DATA_DELAY_SHFT	18
#define TX_CLK_DELAY_MSK	0x300000
#define TX_CLK_DELAY_SHFT	20
#define TX_CS_N_DELAY_MSK	0xc00000
#define TX_CS_N_DELAY_SHFT	22
#define TX_DATA_OE_DELAY_MSK	0x3000000
#define TX_DATA_OE_DELAY_SHFT	24

#define AHB_MASTER_CFG				0x0004
#define HMEM_TYPE_START_MID_TRANS_MSK		0x7
#define HMEM_TYPE_START_MID_TRANS_SHFT		0
#define HMEM_TYPE_LAST_TRANS_MSK		0x38
#define HMEM_TYPE_LAST_TRANS_SHFT		3
#define USE_HMEMTYPE_LAST_ON_DESC_OR_CHAIN_MSK	0xc0
#define USE_HMEMTYPE_LAST_ON_DESC_OR_CHAIN_SHFT	6
#define HMEMTYPE_READ_TRANS_MSK			0x700
#define HMEMTYPE_READ_TRANS_SHFT		8
#define HSHARED					BIT(11)
#define HINNERSHARED				BIT(12)

#define MSTR_INT_EN		0x000C
#define MSTR_INT_STATUS		0x0010
#define RESP_FIFO_UNDERRUN	BIT(0)
#define RESP_FIFO_NOT_EMPTY	BIT(1)
#define RESP_FIFO_RDY		BIT(2)
#define HRESP_FROM_NOC_ERR	BIT(3)
#define WR_FIFO_EMPTY		BIT(9)
#define WR_FIFO_FULL		BIT(10)
#define WR_FIFO_OVERRUN		BIT(11)
#define TRANSACTION_DONE	BIT(16)
#define DMA_CHAIN_DONE		BIT(31)
#define QSPI_ERR_IRQS		(RESP_FIFO_UNDERRUN | HRESP_FROM_NOC_ERR | \
				 WR_FIFO_OVERRUN)
#define QSPI_ALL_IRQS		(QSPI_ERR_IRQS | RESP_FIFO_RDY | \
				 WR_FIFO_EMPTY | WR_FIFO_FULL | \
				 TRANSACTION_DONE)

#define PIO_XFER_CTRL		0x0014
#define REQUEST_COUNT_MSK	0xffff

#define PIO_XFER_CFG		0x0018
#define TRANSFER_DIRECTION	BIT(0)
#define MULTI_IO_MODE_MSK	0xe
#define MULTI_IO_MODE_SHFT	1
#define TRANSFER_FRAGMENT	BIT(8)
#define SDR_1BIT		1
#define SDR_2BIT		2
#define SDR_4BIT		3
#define DDR_1BIT		5
#define DDR_2BIT		6
#define DDR_4BIT		7
#define DMA_DESC_SINGLE_SPI	1
#define DMA_DESC_DUAL_SPI	2
#define DMA_DESC_QUAD_SPI	3

#define PIO_XFER_STATUS		0x001c
#define WR_FIFO_BYTES_MSK	0xffff0000
#define WR_FIFO_BYTES_SHFT	16

#define PIO_DATAOUT_1B		0x0020
#define PIO_DATAOUT_4B		0x0024

#define RD_FIFO_CFG		0x0028
#define CONTINUOUS_MODE		BIT(0)

#define RD_FIFO_STATUS	0x002c
#define FIFO_EMPTY	BIT(11)
#define WR_CNTS_MSK	0x7f0
#define WR_CNTS_SHFT	4
#define RDY_64BYTE	BIT(3)
#define RDY_32BYTE	BIT(2)
#define RDY_16BYTE	BIT(1)
#define FIFO_RDY	BIT(0)

#define RD_FIFO_RESET		0x0030
#define RESET_FIFO		BIT(0)

#define NEXT_DMA_DESC_ADDR	0x0040
#define CURRENT_DMA_DESC_ADDR	0x0044
#define CURRENT_MEM_ADDR	0x0048

#define CUR_MEM_ADDR		0x0048
#define HW_VERSION		0x004c
#define RD_FIFO			0x0050
#define SAMPLING_CLK_CFG	0x0090
#define SAMPLING_CLK_STATUS	0x0094

#define QSPI_ALIGN_REQ	32

enum qspi_dir {
	QSPI_READ,
	QSPI_WRITE,
};

struct qspi_cmd_desc {
	u32 data_address;
	u32 next_descriptor;
	u32 direction:1;
	u32 multi_io_mode:3;
	u32 reserved1:4;
	u32 fragment:1;
	u32 reserved2:7;
	u32 length:16;
};

struct qspi_xfer {
	union {
		const void *tx_buf;
		void *rx_buf;
	};
	unsigned int rem_bytes;
	unsigned int buswidth;
	enum qspi_dir dir;
	bool is_last;
};

enum qspi_clocks {
	QSPI_CLK_CORE,
	QSPI_CLK_IFACE,
	QSPI_NUM_CLKS
};

/*
 * Number of entries in sgt returned from spi framework that-
 * will be supported. Can be modified as required.
 * In practice, given max_dma_len is 64KB, the number of
 * entries is not expected to exceed 1.
 */
#define QSPI_MAX_SG 5

struct qcom_qspi {
	void __iomem *base;
	struct device *dev;
	struct clk_bulk_data *clks;
	struct qspi_xfer xfer;
	struct dma_pool *dma_cmd_pool;
	dma_addr_t dma_cmd_desc[QSPI_MAX_SG];
	void *virt_cmd_desc[QSPI_MAX_SG];
	unsigned int n_cmd_desc;
	struct icc_path *icc_path_cpu_to_qspi;
	unsigned long last_speed;
	/* Lock to protect data accessed by IRQs */
	spinlock_t lock;
};

static u32 qspi_buswidth_to_iomode(struct qcom_qspi *ctrl,
				   unsigned int buswidth)
{
	switch (buswidth) {
	case 1:
		return SDR_1BIT;
	case 2:
		return SDR_2BIT;
	case 4:
		return SDR_4BIT;
	default:
		dev_warn_once(ctrl->dev,
				"Unexpected bus width: %u\n", buswidth);
		return SDR_1BIT;
	}
}

static void qcom_qspi_pio_xfer_cfg(struct qcom_qspi *ctrl)
{
	u32 pio_xfer_cfg;
	u32 iomode;
	const struct qspi_xfer *xfer;

	xfer = &ctrl->xfer;
	pio_xfer_cfg = readl(ctrl->base + PIO_XFER_CFG);
	pio_xfer_cfg &= ~TRANSFER_DIRECTION;
	pio_xfer_cfg |= xfer->dir;
	if (xfer->is_last)
		pio_xfer_cfg &= ~TRANSFER_FRAGMENT;
	else
		pio_xfer_cfg |= TRANSFER_FRAGMENT;
	pio_xfer_cfg &= ~MULTI_IO_MODE_MSK;
	iomode = qspi_buswidth_to_iomode(ctrl, xfer->buswidth);
	pio_xfer_cfg |= iomode << MULTI_IO_MODE_SHFT;

	writel(pio_xfer_cfg, ctrl->base + PIO_XFER_CFG);
}

static void qcom_qspi_pio_xfer_ctrl(struct qcom_qspi *ctrl)
{
	u32 pio_xfer_ctrl;

	pio_xfer_ctrl = readl(ctrl->base + PIO_XFER_CTRL);
	pio_xfer_ctrl &= ~REQUEST_COUNT_MSK;
	pio_xfer_ctrl |= ctrl->xfer.rem_bytes;
	writel(pio_xfer_ctrl, ctrl->base + PIO_XFER_CTRL);
}

static void qcom_qspi_pio_xfer(struct qcom_qspi *ctrl)
{
	u32 ints;

	qcom_qspi_pio_xfer_cfg(ctrl);

	/* Ack any previous interrupts that might be hanging around */
	writel(QSPI_ALL_IRQS, ctrl->base + MSTR_INT_STATUS);

	/* Setup new interrupts */
	if (ctrl->xfer.dir == QSPI_WRITE)
		ints = QSPI_ERR_IRQS | WR_FIFO_EMPTY;
	else
		ints = QSPI_ERR_IRQS | RESP_FIFO_RDY;
	writel(ints, ctrl->base + MSTR_INT_EN);

	/* Kick off the transfer */
	qcom_qspi_pio_xfer_ctrl(ctrl);
}

static void qcom_qspi_handle_err(struct spi_master *master,
				 struct spi_message *msg)
{
	u32 int_status;
	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
	unsigned long flags;
	int i;

	spin_lock_irqsave(&ctrl->lock, flags);
	writel(0, ctrl->base + MSTR_INT_EN);
	int_status = readl(ctrl->base + MSTR_INT_STATUS);
	writel(int_status, ctrl->base + MSTR_INT_STATUS);
	ctrl->xfer.rem_bytes = 0;

	/* free cmd descriptors if they are around (DMA mode) */
	for (i = 0; i < ctrl->n_cmd_desc; i++)
		dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
				  ctrl->dma_cmd_desc[i]);
	ctrl->n_cmd_desc = 0;
	spin_unlock_irqrestore(&ctrl->lock, flags);
}

static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)
{
	int ret;
	unsigned int avg_bw_cpu;

	if (speed_hz == ctrl->last_speed)
		return 0;

	/* In regular operation (SBL_EN=1) core must be 4x transfer clock */
	ret = dev_pm_opp_set_rate(ctrl->dev, speed_hz * 4);
	if (ret) {
		dev_err(ctrl->dev, "Failed to set core clk %d\n", ret);
		return ret;
	}

	/*
	 * Set BW quota for CPU.
	 * We don't have explicit peak requirement so keep it equal to avg_bw.
	 */
	avg_bw_cpu = Bps_to_icc(speed_hz);
	ret = icc_set_bw(ctrl->icc_path_cpu_to_qspi, avg_bw_cpu, avg_bw_cpu);
	if (ret) {
		dev_err(ctrl->dev, "%s: ICC BW voting failed for cpu: %d\n",
			__func__, ret);
		return ret;
	}

	ctrl->last_speed = speed_hz;

	return 0;
}

static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr,
			uint32_t n_bytes)
{
	struct qspi_cmd_desc *virt_cmd_desc, *prev;
	dma_addr_t dma_cmd_desc;

	/* allocate for dma cmd descriptor */
	virt_cmd_desc = dma_pool_alloc(ctrl->dma_cmd_pool, GFP_KERNEL | __GFP_ZERO, &dma_cmd_desc);
	if (!virt_cmd_desc)
		return -ENOMEM;

	ctrl->virt_cmd_desc[ctrl->n_cmd_desc] = virt_cmd_desc;
	ctrl->dma_cmd_desc[ctrl->n_cmd_desc] = dma_cmd_desc;
	ctrl->n_cmd_desc++;

	/* setup cmd descriptor */
	virt_cmd_desc->data_address = dma_ptr;
	virt_cmd_desc->direction = ctrl->xfer.dir;
	virt_cmd_desc->multi_io_mode = qspi_buswidth_to_iomode(ctrl, ctrl->xfer.buswidth);
	virt_cmd_desc->fragment = !ctrl->xfer.is_last;
	virt_cmd_desc->length = n_bytes;

	/* update previous descriptor */
	if (ctrl->n_cmd_desc >= 2) {
		prev = (ctrl->virt_cmd_desc)[ctrl->n_cmd_desc - 2];
		prev->next_descriptor = dma_cmd_desc;
		prev->fragment = 1;
	}

	return 0;
}

static int qcom_qspi_setup_dma_desc(struct qcom_qspi *ctrl,
				struct spi_transfer *xfer)
{
	int ret;
	struct sg_table *sgt;
	dma_addr_t dma_ptr_sg;
	unsigned int dma_len_sg;
	int i;

	if (ctrl->n_cmd_desc) {
		dev_err(ctrl->dev, "Remnant dma buffers n_cmd_desc-%d\n", ctrl->n_cmd_desc);
		return -EIO;
	}

	sgt = (ctrl->xfer.dir == QSPI_READ) ? &xfer->rx_sg : &xfer->tx_sg;
	if (!sgt->nents || sgt->nents > QSPI_MAX_SG) {
		dev_warn_once(ctrl->dev, "Cannot handle %d entries in scatter list\n", sgt->nents);
		return -EAGAIN;
	}

	for (i = 0; i < sgt->nents; i++) {
		dma_ptr_sg = sg_dma_address(sgt->sgl + i);
		if (!IS_ALIGNED(dma_ptr_sg, QSPI_ALIGN_REQ)) {
			dev_warn_once(ctrl->dev, "dma_address not aligned to %d\n", QSPI_ALIGN_REQ);
			return -EAGAIN;
		}
	}

	for (i = 0; i < sgt->nents; i++) {
		dma_ptr_sg = sg_dma_address(sgt->sgl + i);
		dma_len_sg = sg_dma_len(sgt->sgl + i);

		ret = qcom_qspi_alloc_desc(ctrl, dma_ptr_sg, dma_len_sg);
		if (ret)
			goto cleanup;
	}
	return 0;

cleanup:
	for (i = 0; i < ctrl->n_cmd_desc; i++)
		dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
				  ctrl->dma_cmd_desc[i]);
	ctrl->n_cmd_desc = 0;
	return ret;
}

static void qcom_qspi_dma_xfer(struct qcom_qspi *ctrl)
{
	/* Setup new interrupts */
	writel(DMA_CHAIN_DONE, ctrl->base + MSTR_INT_EN);

	/* kick off transfer */
	writel((u32)((ctrl->dma_cmd_desc)[0]), ctrl->base + NEXT_DMA_DESC_ADDR);
}

/* Switch to DMA if transfer length exceeds this */
#define QSPI_MAX_BYTES_FIFO 64

static bool qcom_qspi_can_dma(struct spi_controller *ctlr,
			 struct spi_device *slv, struct spi_transfer *xfer)
{
	return xfer->len > QSPI_MAX_BYTES_FIFO;
}

static int qcom_qspi_transfer_one(struct spi_master *master,
				  struct spi_device *slv,
				  struct spi_transfer *xfer)
{
	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
	int ret;
	unsigned long speed_hz;
	unsigned long flags;
	u32 mstr_cfg;

	speed_hz = slv->max_speed_hz;
	if (xfer->speed_hz)
		speed_hz = xfer->speed_hz;

	ret = qcom_qspi_set_speed(ctrl, speed_hz);
	if (ret)
		return ret;

	spin_lock_irqsave(&ctrl->lock, flags);
	mstr_cfg = readl(ctrl->base + MSTR_CONFIG);

	/* We are half duplex, so either rx or tx will be set */
	if (xfer->rx_buf) {
		ctrl->xfer.dir = QSPI_READ;
		ctrl->xfer.buswidth = xfer->rx_nbits;
		ctrl->xfer.rx_buf = xfer->rx_buf;
	} else {
		ctrl->xfer.dir = QSPI_WRITE;
		ctrl->xfer.buswidth = xfer->tx_nbits;
		ctrl->xfer.tx_buf = xfer->tx_buf;
	}
	ctrl->xfer.is_last = list_is_last(&xfer->transfer_list,
					  &master->cur_msg->transfers);
	ctrl->xfer.rem_bytes = xfer->len;

	if (xfer->rx_sg.nents || xfer->tx_sg.nents) {
		/* do DMA transfer */
		if (!(mstr_cfg & DMA_ENABLE)) {
			mstr_cfg |= DMA_ENABLE;
			writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
		}

		ret = qcom_qspi_setup_dma_desc(ctrl, xfer);
		if (ret != -EAGAIN) {
			if (!ret)
				qcom_qspi_dma_xfer(ctrl);
			goto exit;
		}
		dev_warn_once(ctrl->dev, "DMA failure, falling back to PIO");
		ret = 0; /* We'll retry w/ PIO */
	}

	if (mstr_cfg & DMA_ENABLE) {
		mstr_cfg &= ~DMA_ENABLE;
		writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
	}
	qcom_qspi_pio_xfer(ctrl);

exit:
	spin_unlock_irqrestore(&ctrl->lock, flags);

	if (ret)
		return ret;

	/* We'll call spi_finalize_current_transfer() when done */
	return 1;
}

static int qcom_qspi_prepare_message(struct spi_master *master,
				     struct spi_message *message)
{
	u32 mstr_cfg;
	struct qcom_qspi *ctrl;
	int tx_data_oe_delay = 1;
	int tx_data_delay = 1;
	unsigned long flags;

	ctrl = spi_master_get_devdata(master);
	spin_lock_irqsave(&ctrl->lock, flags);

	mstr_cfg = readl(ctrl->base + MSTR_CONFIG);
	mstr_cfg &= ~CHIP_SELECT_NUM;
	if (spi_get_chipselect(message->spi, 0))
		mstr_cfg |= CHIP_SELECT_NUM;

	mstr_cfg |= FB_CLK_EN | PIN_WPN | PIN_HOLDN | SBL_EN | FULL_CYCLE_MODE;
	mstr_cfg &= ~(SPI_MODE_MSK | TX_DATA_OE_DELAY_MSK | TX_DATA_DELAY_MSK);
	mstr_cfg |= message->spi->mode << SPI_MODE_SHFT;
	mstr_cfg |= tx_data_oe_delay << TX_DATA_OE_DELAY_SHFT;
	mstr_cfg |= tx_data_delay << TX_DATA_DELAY_SHFT;
	mstr_cfg &= ~DMA_ENABLE;

	writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
	spin_unlock_irqrestore(&ctrl->lock, flags);

	return 0;
}

static int qcom_qspi_alloc_dma(struct qcom_qspi *ctrl)
{
	ctrl->dma_cmd_pool = dmam_pool_create("qspi cmd desc pool",
		ctrl->dev, sizeof(struct qspi_cmd_desc), 0, 0);
	if (!ctrl->dma_cmd_pool)
		return -ENOMEM;

	return 0;
}

static irqreturn_t pio_read(struct qcom_qspi *ctrl)
{
	u32 rd_fifo_status;
	u32 rd_fifo;
	unsigned int wr_cnts;
	unsigned int bytes_to_read;
	unsigned int words_to_read;
	u32 *word_buf;
	u8 *byte_buf;
	int i;

	rd_fifo_status = readl(ctrl->base + RD_FIFO_STATUS);

	if (!(rd_fifo_status & FIFO_RDY)) {
		dev_dbg(ctrl->dev, "Spurious IRQ %#x\n", rd_fifo_status);
		return IRQ_NONE;
	}

	wr_cnts = (rd_fifo_status & WR_CNTS_MSK) >> WR_CNTS_SHFT;
	wr_cnts = min(wr_cnts, ctrl->xfer.rem_bytes);

	words_to_read = wr_cnts / QSPI_BYTES_PER_WORD;
	bytes_to_read = wr_cnts % QSPI_BYTES_PER_WORD;

	if (words_to_read) {
		word_buf = ctrl->xfer.rx_buf;
		ctrl->xfer.rem_bytes -= words_to_read * QSPI_BYTES_PER_WORD;
		ioread32_rep(ctrl->base + RD_FIFO, word_buf, words_to_read);
		ctrl->xfer.rx_buf = word_buf + words_to_read;
	}

	if (bytes_to_read) {
		byte_buf = ctrl->xfer.rx_buf;
		rd_fifo = readl(ctrl->base + RD_FIFO);
		ctrl->xfer.rem_bytes -= bytes_to_read;
		for (i = 0; i < bytes_to_read; i++)
			*byte_buf++ = rd_fifo >> (i * BITS_PER_BYTE);
		ctrl->xfer.rx_buf = byte_buf;
	}

	return IRQ_HANDLED;
}

static irqreturn_t pio_write(struct qcom_qspi *ctrl)
{
	const void *xfer_buf = ctrl->xfer.tx_buf;
	const int *word_buf;
	const char *byte_buf;
	unsigned int wr_fifo_bytes;
	unsigned int wr_fifo_words;
	unsigned int wr_size;
	unsigned int rem_words;

	wr_fifo_bytes = readl(ctrl->base + PIO_XFER_STATUS);
	wr_fifo_bytes >>= WR_FIFO_BYTES_SHFT;

	if (ctrl->xfer.rem_bytes < QSPI_BYTES_PER_WORD) {
		/* Process the last 1-3 bytes */
		wr_size = min(wr_fifo_bytes, ctrl->xfer.rem_bytes);
		ctrl->xfer.rem_bytes -= wr_size;

		byte_buf = xfer_buf;
		while (wr_size--)
			writel(*byte_buf++,
			       ctrl->base + PIO_DATAOUT_1B);
		ctrl->xfer.tx_buf = byte_buf;
	} else {
		/*
		 * Process all the whole words; to keep things simple we'll
		 * just wait for the next interrupt to handle the last 1-3
		 * bytes if we don't have an even number of words.
		 */
		rem_words = ctrl->xfer.rem_bytes / QSPI_BYTES_PER_WORD;
		wr_fifo_words = wr_fifo_bytes / QSPI_BYTES_PER_WORD;

		wr_size = min(rem_words, wr_fifo_words);
		ctrl->xfer.rem_bytes -= wr_size * QSPI_BYTES_PER_WORD;

		word_buf = xfer_buf;
		iowrite32_rep(ctrl->base + PIO_DATAOUT_4B, word_buf, wr_size);
		ctrl->xfer.tx_buf = word_buf + wr_size;

	}

	return IRQ_HANDLED;
}

static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
{
	u32 int_status;
	struct qcom_qspi *ctrl = dev_id;
	irqreturn_t ret = IRQ_NONE;

	spin_lock(&ctrl->lock);

	int_status = readl(ctrl->base + MSTR_INT_STATUS);
	writel(int_status, ctrl->base + MSTR_INT_STATUS);

	/* PIO mode handling */
	if (ctrl->xfer.dir == QSPI_WRITE) {
		if (int_status & WR_FIFO_EMPTY)
			ret = pio_write(ctrl);
	} else {
		if (int_status & RESP_FIFO_RDY)
			ret = pio_read(ctrl);
	}

	if (int_status & QSPI_ERR_IRQS) {
		if (int_status & RESP_FIFO_UNDERRUN)
			dev_err(ctrl->dev, "IRQ error: FIFO underrun\n");
		if (int_status & WR_FIFO_OVERRUN)
			dev_err(ctrl->dev, "IRQ error: FIFO overrun\n");
		if (int_status & HRESP_FROM_NOC_ERR)
			dev_err(ctrl->dev, "IRQ error: NOC response error\n");
		ret = IRQ_HANDLED;
	}

	if (!ctrl->xfer.rem_bytes) {
		writel(0, ctrl->base + MSTR_INT_EN);
		spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
	}

	/* DMA mode handling */
	if (int_status & DMA_CHAIN_DONE) {
		int i;

		writel(0, ctrl->base + MSTR_INT_EN);
		ctrl->xfer.rem_bytes = 0;

		for (i = 0; i < ctrl->n_cmd_desc; i++)
			dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
					  ctrl->dma_cmd_desc[i]);
		ctrl->n_cmd_desc = 0;

		ret = IRQ_HANDLED;
		spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
	}

	spin_unlock(&ctrl->lock);
	return ret;
}

static int qcom_qspi_probe(struct platform_device *pdev)
{
	int ret;
	struct device *dev;
	struct spi_master *master;
	struct qcom_qspi *ctrl;

	dev = &pdev->dev;

	master = devm_spi_alloc_master(dev, sizeof(*ctrl));
	if (!master)
		return -ENOMEM;

	platform_set_drvdata(pdev, master);

	ctrl = spi_master_get_devdata(master);

	spin_lock_init(&ctrl->lock);
	ctrl->dev = dev;
	ctrl->base = devm_platform_ioremap_resource(pdev, 0);
	if (IS_ERR(ctrl->base))
		return PTR_ERR(ctrl->base);

	ctrl->clks = devm_kcalloc(dev, QSPI_NUM_CLKS,
				  sizeof(*ctrl->clks), GFP_KERNEL);
	if (!ctrl->clks)
		return -ENOMEM;

	ctrl->clks[QSPI_CLK_CORE].id = "core";
	ctrl->clks[QSPI_CLK_IFACE].id = "iface";
	ret = devm_clk_bulk_get(dev, QSPI_NUM_CLKS, ctrl->clks);
	if (ret)
		return ret;

	ctrl->icc_path_cpu_to_qspi = devm_of_icc_get(dev, "qspi-config");
	if (IS_ERR(ctrl->icc_path_cpu_to_qspi))
		return dev_err_probe(dev, PTR_ERR(ctrl->icc_path_cpu_to_qspi),
				     "Failed to get cpu path\n");

	/* Set BW vote for register access */
	ret = icc_set_bw(ctrl->icc_path_cpu_to_qspi, Bps_to_icc(1000),
				Bps_to_icc(1000));
	if (ret) {
		dev_err(ctrl->dev, "%s: ICC BW voting failed for cpu: %d\n",
				__func__, ret);
		return ret;
	}

	ret = icc_disable(ctrl->icc_path_cpu_to_qspi);
	if (ret) {
		dev_err(ctrl->dev, "%s: ICC disable failed for cpu: %d\n",
				__func__, ret);
		return ret;
	}

	ret = platform_get_irq(pdev, 0);
	if (ret < 0)
		return ret;
	ret = devm_request_irq(dev, ret, qcom_qspi_irq, 0, dev_name(dev), ctrl);
	if (ret) {
		dev_err(dev, "Failed to request irq %d\n", ret);
		return ret;
	}

	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
	if (ret)
		return dev_err_probe(dev, ret, "could not set DMA mask\n");

	master->max_speed_hz = 300000000;
	master->max_dma_len = 65536; /* as per HPG */
	master->dma_alignment = QSPI_ALIGN_REQ;
	master->num_chipselect = QSPI_NUM_CS;
	master->bus_num = -1;
	master->dev.of_node = pdev->dev.of_node;
	master->mode_bits = SPI_MODE_0 |
			    SPI_TX_DUAL | SPI_RX_DUAL |
			    SPI_TX_QUAD | SPI_RX_QUAD;
	master->flags = SPI_MASTER_HALF_DUPLEX;
	master->prepare_message = qcom_qspi_prepare_message;
	master->transfer_one = qcom_qspi_transfer_one;
	master->handle_err = qcom_qspi_handle_err;
	if (of_property_read_bool(pdev->dev.of_node, "iommus"))
		master->can_dma = qcom_qspi_can_dma;
	master->auto_runtime_pm = true;

	ret = devm_pm_opp_set_clkname(&pdev->dev, "core");
	if (ret)
		return ret;
	/* OPP table is optional */
	ret = devm_pm_opp_of_add_table(&pdev->dev);
	if (ret && ret != -ENODEV) {
		dev_err(&pdev->dev, "invalid OPP table in device tree\n");
		return ret;
	}

	ret = qcom_qspi_alloc_dma(ctrl);
	if (ret)
		return ret;

	pm_runtime_use_autosuspend(dev);
	pm_runtime_set_autosuspend_delay(dev, 250);
	pm_runtime_enable(dev);

	ret = spi_register_master(master);
	if (!ret)
		return 0;

	pm_runtime_disable(dev);

	return ret;
}

static void qcom_qspi_remove(struct platform_device *pdev)
{
	struct spi_master *master = platform_get_drvdata(pdev);

	/* Unregister _before_ disabling pm_runtime() so we stop transfers */
	spi_unregister_master(master);

	pm_runtime_disable(&pdev->dev);
}

static int __maybe_unused qcom_qspi_runtime_suspend(struct device *dev)
{
	struct spi_master *master = dev_get_drvdata(dev);
	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
	int ret;

	/* Drop the performance state vote */
	dev_pm_opp_set_rate(dev, 0);
	clk_bulk_disable_unprepare(QSPI_NUM_CLKS, ctrl->clks);

	ret = icc_disable(ctrl->icc_path_cpu_to_qspi);
	if (ret) {
		dev_err_ratelimited(ctrl->dev, "%s: ICC disable failed for cpu: %d\n",
			__func__, ret);
		return ret;
	}

	pinctrl_pm_select_sleep_state(dev);

	return 0;
}

static int __maybe_unused qcom_qspi_runtime_resume(struct device *dev)
{
	struct spi_master *master = dev_get_drvdata(dev);
	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
	int ret;

	pinctrl_pm_select_default_state(dev);

	ret = icc_enable(ctrl->icc_path_cpu_to_qspi);
	if (ret) {
		dev_err_ratelimited(ctrl->dev, "%s: ICC enable failed for cpu: %d\n",
			__func__, ret);
		return ret;
	}

	ret = clk_bulk_prepare_enable(QSPI_NUM_CLKS, ctrl->clks);
	if (ret)
		return ret;

	return dev_pm_opp_set_rate(dev, ctrl->last_speed * 4);
}

static int __maybe_unused qcom_qspi_suspend(struct device *dev)
{
	struct spi_master *master = dev_get_drvdata(dev);
	int ret;

	ret = spi_master_suspend(master);
	if (ret)
		return ret;

	ret = pm_runtime_force_suspend(dev);
	if (ret)
		spi_master_resume(master);

	return ret;
}

static int __maybe_unused qcom_qspi_resume(struct device *dev)
{
	struct spi_master *master = dev_get_drvdata(dev);
	int ret;

	ret = pm_runtime_force_resume(dev);
	if (ret)
		return ret;

	ret = spi_master_resume(master);
	if (ret)
		pm_runtime_force_suspend(dev);

	return ret;
}

static const struct dev_pm_ops qcom_qspi_dev_pm_ops = {
	SET_RUNTIME_PM_OPS(qcom_qspi_runtime_suspend,
			   qcom_qspi_runtime_resume, NULL)
	SET_SYSTEM_SLEEP_PM_OPS(qcom_qspi_suspend, qcom_qspi_resume)
};

static const struct of_device_id qcom_qspi_dt_match[] = {
	{ .compatible = "qcom,qspi-v1", },
	{ }
};
MODULE_DEVICE_TABLE(of, qcom_qspi_dt_match);

static struct platform_driver qcom_qspi_driver = {
	.driver = {
		.name		= "qcom_qspi",
		.pm		= &qcom_qspi_dev_pm_ops,
		.of_match_table = qcom_qspi_dt_match,
	},
	.probe = qcom_qspi_probe,
	.remove_new = qcom_qspi_remove,
};
module_platform_driver(qcom_qspi_driver);

MODULE_DESCRIPTION("SPI driver for QSPI cores");
MODULE_LICENSE("GPL v2");