From c64e38ea17a81721da0393584fd807f8434050fa Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 1 Nov 2010 14:32:27 -0400
Subject: xen/blkfront: map REQ_FLUSH into a full barrier

Implement a flush as a full barrier, since we have nothing weaker.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/xen-blkfront.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 06e2812ba124..3a318d8576c5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -245,14 +245,11 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
 }
 
 /*
- * blkif_queue_request
+ * Generate a Xen blkfront IO request from a blk layer request.  Reads
+ * and writes are handled as expected.  Since we lack a loose flush
+ * request, we map flushes into a full ordered barrier.
  *
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- *   virtual address in the guest os.
+ * @req: a request struct
  */
 static int blkif_queue_request(struct request *req)
 {
@@ -289,7 +286,7 @@ static int blkif_queue_request(struct request *req)
 
 	ring_req->operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
-	if (req->cmd_flags & REQ_HARDBARRIER)
+	if (req->cmd_flags & REQ_FLUSH)
 		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
 
 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -1069,14 +1066,8 @@ static void blkfront_connect(struct blkfront_info *info)
 	 */
 	info->feature_flush = 0;
 
-	/*
-	 * The driver doesn't properly handled empty flushes, so
-	 * lets disable barrier support for now.
-	 */
-#if 0
 	if (!err && barrier)
 		info->feature_flush = REQ_FLUSH;
-#endif
 
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
-- 
cgit v1.2.3


From a945b9801a9bfd4a98bcfd9f6656b5027b254e3f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 1 Nov 2010 17:03:14 -0400
Subject: xen/blkfront: change blk_shadow.request to proper pointer

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 drivers/block/xen-blkfront.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3a318d8576c5..31c8a643d109 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,7 +65,7 @@ enum blkif_state {
 
 struct blk_shadow {
 	struct blkif_request req;
-	unsigned long request;
+	struct request *request;
 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
@@ -136,7 +136,7 @@ static void add_id_to_freelist(struct blkfront_info *info,
 			       unsigned long id)
 {
 	info->shadow[id].req.id  = info->shadow_free;
-	info->shadow[id].request = 0;
+	info->shadow[id].request = NULL;
 	info->shadow_free = id;
 }
 
@@ -278,7 +278,7 @@ static int blkif_queue_request(struct request *req)
 	/* Fill out a communications ring structure. */
 	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 	id = get_id_from_freelist(info);
-	info->shadow[id].request = (unsigned long)req;
+	info->shadow[id].request = req;
 
 	ring_req->id = id;
 	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -633,7 +633,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
 		bret = RING_GET_RESPONSE(&info->ring, i);
 		id   = bret->id;
-		req  = (struct request *)info->shadow[id].request;
+		req  = info->shadow[id].request;
 
 		blkif_completion(&info->shadow[id]);
 
@@ -898,7 +898,7 @@ static int blkif_recover(struct blkfront_info *info)
 	/* Stage 3: Find pending requests and requeue them. */
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/* Not in use? */
-		if (copy[i].request == 0)
+		if (!copy[i].request)
 			continue;
 
 		/* Grab a request slot and copy shadow state into it. */
@@ -915,9 +915,7 @@ static int blkif_recover(struct blkfront_info *info)
 				req->seg[j].gref,
 				info->xbdev->otherend_id,
 				pfn_to_mfn(info->shadow[req->id].frame[j]),
-				rq_data_dir(
-					(struct request *)
-					info->shadow[req->id].request));
+				rq_data_dir(info->shadow[req->id].request));
 		info->shadow[req->id].req = *req;
 
 		info->ring.req_prod_pvt++;
-- 
cgit v1.2.3


From be2f8373c188ed1f5d36003c9928e4d695213080 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 2 Nov 2010 10:38:33 -0400
Subject: xen/blkfront: Implement FUA with BLKIF_OP_WRITE_BARRIER

The BLKIF_OP_WRITE_BARRIER is a full ordered barrier, so we can use it
to implement FUA as well as a plain FLUSH.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/xen-blkfront.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 31c8a643d109..76b874a79175 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -286,8 +286,18 @@ static int blkif_queue_request(struct request *req)
 
 	ring_req->operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
-	if (req->cmd_flags & REQ_FLUSH)
+
+	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+		/*
+		 * Ideally we could just do an unordered
+		 * flush-to-disk, but all we have is a full write
+		 * barrier at the moment.  However, a barrier write is
+		 * a superset of FUA, so we can implement it the same
+		 * way.  (It's also a FLUSH+FUA, since it is
+		 * guaranteed ordered WRT previous writes.)
+		 */
 		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+	}
 
 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
 	BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -1065,7 +1075,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	info->feature_flush = 0;
 
 	if (!err && barrier)
-		info->feature_flush = REQ_FLUSH;
+		info->feature_flush = REQ_FLUSH | REQ_FUA;
 
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
-- 
cgit v1.2.3


From 239b0b441449b2c70492880e6c6a4a885afa74ba Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 2 Nov 2010 13:15:53 -0400
Subject: MAINTAINERS: add drivers/char/hvc_tile.c as maintained by tile

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0094224ca79b..2525b04f2e25 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5827,6 +5827,7 @@ M:	Chris Metcalf <cmetcalf@tilera.com>
 W:	http://www.tilera.com/scm/
 S:	Supported
 F:	arch/tile/
+F:	drivers/char/hvc_tile.c
 
 TLAN NETWORK DRIVER
 M:	Samuel Chessman <chessman@tux.org>
-- 
cgit v1.2.3


From dcb8baeceaa1c629bbd06f472cea023ad08a0c33 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 2 Nov 2010 11:55:58 -0400
Subject: xen/blkfront: cope with backend that fail empty
 BLKIF_OP_WRITE_BARRIER requests

Some(?) Xen block backends fail BLKIF_OP_WRITE_BARRIER requests, which
Linux uses as a cache flush operation.  In that case, disable use
of FLUSH.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Daniel Stodden <daniel.stodden@citrix.com>
---
 drivers/block/xen-blkfront.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 76b874a79175..4f9e22f29138 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -656,6 +656,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
 				       info->gd->disk_name);
 				error = -EOPNOTSUPP;
+			}
+			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+				     info->shadow[id].req.nr_segments == 0)) {
+				printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
+				       info->gd->disk_name);
+				error = -EOPNOTSUPP;
+			}
+			if (unlikely(error)) {
+				if (error == -EOPNOTSUPP)
+					error = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
 			}
-- 
cgit v1.2.3


From 62d69e6a57c5b5d52a607b1ce73ff6af65c992eb Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <eballetbo@gmail.com>
Date: Sat, 23 Oct 2010 16:48:58 +0000
Subject: omap3: IGEP v2: Remove onenand_setup no-op function

Set onenand_setup to NULL instead of adding a no-op function.

Signed-off-by: Enric Balletbo i Serra <eballetbo@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-igep0020.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index 5e035a58b809..20b199fa5a60 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -136,16 +136,9 @@ static struct mtd_partition igep2_onenand_partitions[] = {
 	},
 };
 
-static int igep2_onenand_setup(void __iomem *onenand_base, int freq)
-{
-       /* nothing is required to be setup for onenand as of now */
-       return 0;
-}
-
 static struct omap_onenand_platform_data igep2_onenand_data = {
 	.parts = igep2_onenand_partitions,
 	.nr_parts = ARRAY_SIZE(igep2_onenand_partitions),
-	.onenand_setup = igep2_onenand_setup,
 	.dma_channel	= -1,	/* disable DMA in OMAP OneNAND driver */
 };
 
-- 
cgit v1.2.3


From 25d5c699f983a2da51f5165eb9a8fc6338124b6c Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Sun, 7 Nov 2010 16:22:28 -0500
Subject: mmc: Fix printing of card DDR type

We should not call mmc_card_set_ddr_mode() if we are in single data
mode.  This sets DDR and causes the kernel log to say the card is DDR
when it is not.

Explicitly set ddr to 0 rather then rely on MMC_SDR_MODE being 0 when
doing the checks.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 995261f7fd70..77f93c3b8808 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -375,7 +375,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	struct mmc_card *oldcard)
 {
 	struct mmc_card *card;
-	int err, ddr = MMC_SDR_MODE;
+	int err, ddr = 0;
 	u32 cid[4];
 	unsigned int max_dtr;
 
@@ -562,7 +562,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			       1 << bus_width, ddr);
 			err = 0;
 		} else {
-			mmc_card_set_ddr_mode(card);
+			if (ddr)
+				mmc_card_set_ddr_mode(card);
+			else
+				ddr = MMC_SDR_MODE;
+
 			mmc_set_bus_width_ddr(card->host, bus_width, ddr);
 		}
 	}
-- 
cgit v1.2.3


From 14d4031d21d8a63ad84e5ab9198d0503efabc780 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 4 Nov 2010 13:59:11 +0800
Subject: mmc: ushc: Return proper error code for ushc_probe()

Improves error handling in the ushc driver.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/ushc.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c
index b4ead4a13c98..f8f65df9b017 100644
--- a/drivers/mmc/host/ushc.c
+++ b/drivers/mmc/host/ushc.c
@@ -425,7 +425,7 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
 	struct mmc_host *mmc;
 	struct ushc_data *ushc;
-	int ret = -ENOMEM;
+	int ret;
 
 	mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev);
 	if (mmc == NULL)
@@ -462,11 +462,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	mmc->max_blk_count = 511;
 
 	ushc->int_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (ushc->int_urb == NULL)
+	if (ushc->int_urb == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	ushc->int_data = kzalloc(sizeof(struct ushc_int_data), GFP_KERNEL);
-	if (ushc->int_data == NULL)
+	if (ushc->int_data == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	usb_fill_int_urb(ushc->int_urb, ushc->usb_dev,
 			 usb_rcvintpipe(usb_dev,
 					intf->cur_altsetting->endpoint[0].desc.bEndpointAddress),
@@ -475,11 +479,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 			 intf->cur_altsetting->endpoint[0].desc.bInterval);
 
 	ushc->cbw_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (ushc->cbw_urb == NULL)
+	if (ushc->cbw_urb == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	ushc->cbw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL);
-	if (ushc->cbw == NULL)
+	if (ushc->cbw == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	ushc->cbw->signature = USHC_CBW_SIGNATURE;
 
 	usb_fill_bulk_urb(ushc->cbw_urb, ushc->usb_dev, usb_sndbulkpipe(usb_dev, 2),
@@ -487,15 +495,21 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 			  cbw_callback, ushc);
 
 	ushc->data_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (ushc->data_urb == NULL)
+	if (ushc->data_urb == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 
 	ushc->csw_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (ushc->csw_urb == NULL)
+	if (ushc->csw_urb == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	ushc->csw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL);
-	if (ushc->csw == NULL)
+	if (ushc->csw == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	usb_fill_bulk_urb(ushc->csw_urb, ushc->usb_dev, usb_rcvbulkpipe(usb_dev, 6),
 			  ushc->csw, sizeof(struct ushc_csw),
 			  csw_callback, ushc);
-- 
cgit v1.2.3


From 5f619704d18b93869d045abc49e09cdba109b04b Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@laptop.org>
Date: Thu, 4 Nov 2010 22:20:39 +0000
Subject: mmc: sdhci: Properly enable SDIO IRQ wakeups

A little more work was needed for SDIO IRQ wakeups to be functional.

Wake-on-WLAN on the SD WiFi adapter in the XO-1.5 laptop is now working.

Signed-off-by: Daniel Drake <dsd@laptop.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pci.c | 11 +++++++++--
 drivers/mmc/host/sdhci.c     | 10 ++++++++++
 drivers/mmc/host/sdhci.h     |  4 ++++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 55746bac2f44..d196e77a93dc 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -637,6 +637,7 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 {
 	struct sdhci_pci_chip *chip;
 	struct sdhci_pci_slot *slot;
+	mmc_pm_flag_t slot_pm_flags;
 	mmc_pm_flag_t pm_flags = 0;
 	int i, ret;
 
@@ -657,7 +658,11 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 			return ret;
 		}
 
-		pm_flags |= slot->host->mmc->pm_flags;
+		slot_pm_flags = slot->host->mmc->pm_flags;
+		if (slot_pm_flags & MMC_PM_WAKE_SDIO_IRQ)
+			sdhci_enable_irq_wakeups(slot->host);
+
+		pm_flags |= slot_pm_flags;
 	}
 
 	if (chip->fixes && chip->fixes->suspend) {
@@ -671,8 +676,10 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 
 	pci_save_state(pdev);
 	if (pm_flags & MMC_PM_KEEP_POWER) {
-		if (pm_flags & MMC_PM_WAKE_SDIO_IRQ)
+		if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) {
+			pci_pme_active(pdev, true);
 			pci_enable_wake(pdev, PCI_D3hot, 1);
+		}
 		pci_set_power_state(pdev, PCI_D3hot);
 	} else {
 		pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 782c0ee3c925..154cbf83c1ab 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1681,6 +1681,16 @@ int sdhci_resume_host(struct sdhci_host *host)
 
 EXPORT_SYMBOL_GPL(sdhci_resume_host);
 
+void sdhci_enable_irq_wakeups(struct sdhci_host *host)
+{
+	u8 val;
+	val = sdhci_readb(host, SDHCI_WAKE_UP_CONTROL);
+	val |= SDHCI_WAKE_ON_INT;
+	sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL);
+}
+
+EXPORT_SYMBOL_GPL(sdhci_enable_irq_wakeups);
+
 #endif /* CONFIG_PM */
 
 /*****************************************************************************\
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index b7b8a3b28b01..d52a7163b97a 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -87,6 +87,9 @@
 #define SDHCI_BLOCK_GAP_CONTROL	0x2A
 
 #define SDHCI_WAKE_UP_CONTROL	0x2B
+#define  SDHCI_WAKE_ON_INT	0x01
+#define  SDHCI_WAKE_ON_INSERT	0x02
+#define  SDHCI_WAKE_ON_REMOVE	0x04
 
 #define SDHCI_CLOCK_CONTROL	0x2C
 #define  SDHCI_DIVIDER_SHIFT	8
@@ -317,6 +320,7 @@ extern void sdhci_remove_host(struct sdhci_host *host, int dead);
 #ifdef CONFIG_PM
 extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state);
 extern int sdhci_resume_host(struct sdhci_host *host);
+extern void sdhci_enable_irq_wakeups(struct sdhci_host *host);
 #endif
 
 #endif /* __SDHCI_HW_H */
-- 
cgit v1.2.3


From 37865fe91582582a6f6c00652f6a2b1ff71f8a78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eric=20B=C3=A9nard?= <eric@eukrea.com>
Date: Sat, 23 Oct 2010 01:57:21 +0200
Subject: mmc: sdhci-esdhc-imx: fix timeout on i.MX's sdhci
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes timeout problems on i.MX's sdhci as suggested by
Richard Zhu.

Tested on:
- i.MX257: not needed
- i.MX357: needed
- i.MX515: needed

More details can be found here:
http://lists.infradead.org/pipermail/linux-arm-kernel/2010-October/029748.html

Signed-off-by: Eric Bénard <eric@eukrea.com>
Tested-by: Shawn Guo <shawn.gsc@gmail.com>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 2e9cca19c90b..28e63ef58d0f 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -17,6 +17,7 @@
 #include <linux/clk.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sdhci-pltfm.h>
+#include <mach/hardware.h>
 #include "sdhci.h"
 #include "sdhci-pltfm.h"
 #include "sdhci-esdhc.h"
@@ -112,6 +113,9 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd
 	clk_enable(clk);
 	pltfm_host->clk = clk;
 
+	if (cpu_is_mx35() || cpu_is_mx51())
+		host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 16a790bcce87740d219b7227eaa4df72804097ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eric=20B=C3=A9nard?= <eric@eukrea.com>
Date: Sat, 23 Oct 2010 01:57:22 +0200
Subject: mmc: sdhci-esdhc-imx: enable QUIRK_NO_MULTIBLOCK only for i.MX25 and
 i.MX35
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only these CPUs list the bug in their errata.

Signed-off-by: Eric Bénard <eric@eukrea.com>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 28e63ef58d0f..9b82910b9dbb 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -116,6 +116,10 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd
 	if (cpu_is_mx35() || cpu_is_mx51())
 		host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
 
+	/* Fix errata ENGcm07207 which is present on i.MX25 and i.MX35 */
+	if (cpu_is_mx25() || cpu_is_mx35())
+		host->quirks |= SDHCI_QUIRK_NO_MULTIBLOCK;
+
 	return 0;
 }
 
@@ -137,10 +141,8 @@ static struct sdhci_ops sdhci_esdhc_ops = {
 };
 
 struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
-	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_NO_MULTIBLOCK
-			| SDHCI_QUIRK_BROKEN_ADMA,
+	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA,
 	/* ADMA has issues. Might be fixable */
-	/* NO_MULTIBLOCK might be MX35 only (Errata: ENGcm07207) */
 	.ops = &sdhci_esdhc_ops,
 	.init = esdhc_pltfm_init,
 	.exit = esdhc_pltfm_exit,
-- 
cgit v1.2.3


From 35ac6f081f26e1b6b3482b9c8dfccebe7817c691 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Tue, 9 Nov 2010 13:57:29 +0000
Subject: mmc: sdhci: Fix crash on boot with C0 stepping Moorestown platforms

SDHC2 is newly added in C0 stepping of Langwell. Without the Moorestown
specific quirk, the default pci_probe will be called and crash the kernel.

This patch unblocks the crash problem on C0 by using the same probing
function as HC1, which limits the number of slots to one.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pci.c | 20 ++++++++++++++------
 include/linux/pci_ids.h      |  1 +
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index d196e77a93dc..3d9c2460d437 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -149,11 +149,11 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
  * ADMA operation is disabled for Moorestown platform due to
  * hardware bugs.
  */
-static int mrst_hc1_probe(struct sdhci_pci_chip *chip)
+static int mrst_hc_probe(struct sdhci_pci_chip *chip)
 {
 	/*
-	 * slots number is fixed here for MRST as SDIO3 is never used and has
-	 * hardware bugs.
+	 * slots number is fixed here for MRST as SDIO3/5 are never used and
+	 * have hardware bugs.
 	 */
 	chip->num_slots = 1;
 	return 0;
@@ -163,9 +163,9 @@ static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = {
 	.quirks		= SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT,
 };
 
-static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1 = {
+static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = {
 	.quirks		= SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT,
-	.probe		= mrst_hc1_probe,
+	.probe		= mrst_hc_probe,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = {
@@ -538,7 +538,15 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.device		= PCI_DEVICE_ID_INTEL_MRST_SD1,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mrst_hc1,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_MRST_SD2,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2,
 	},
 
 	{
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c6bcfe93b9ca..d369b533dc2a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2441,6 +2441,7 @@
 #define PCI_DEVICE_ID_INTEL_MFD_SDIO2	0x0822
 #define PCI_DEVICE_ID_INTEL_MFD_EMMC0	0x0823
 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1	0x0824
+#define PCI_DEVICE_ID_INTEL_MRST_SD2	0x084F
 #define PCI_DEVICE_ID_INTEL_I960	0x0960
 #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
 #define PCI_DEVICE_ID_INTEL_8257X_SOL	0x1062
-- 
cgit v1.2.3


From c0deae8c9587419ab13874b74425ce2eb2e18508 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Wed, 3 Nov 2010 18:52:56 +0200
Subject: posix-cpu-timers: Rcu_read_lock/unlock protect find_task_by_vpid call

Commit 4221a9918e38b7494cee341dda7b7b4bb8c04bde "Add RCU check for
find_task_by_vpid()" introduced rcu_lockdep_assert to find_task_by_pid_ns.
Add rcu_read_lock/rcu_read_unlock to call find_task_by_vpid.

Tetsuo Handa wrote:
| Quoting from one of posts in that thead
| http://kerneltrap.org/mailarchive/linux-kernel/2010/2/8/4536388
|
|| Usually tasklist gives enough protection, but if copy_process() fails
|| it calls free_pid() lockless and does call_rcu(delayed_put_pid().
|| This means, without rcu lock find_pid_ns() can't scan the hash table
|| safely.

Thomas Gleixner wrote:
| We can remove the tasklist_lock while at it. rcu_read_lock is enough.

Patch also replaces thread_group_leader with has_group_leader_pid
in accordance to comment by Oleg Nesterov:

| ... thread_group_leader() check is not relaible without
| tasklist. If we race with de_thread() find_task_by_vpid() can find
| the new leader before it updates its ->group_leader.
|
| perhaps it makes sense to change posix_cpu_timer_create() to use
| has_group_leader_pid() instead, just to make this code not look racy
| and avoid adding new problems.


Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
LKML-Reference: <20101103165256.GD30053@swordfish.minsk.epam.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/posix-cpu-timers.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 6842eeba5879..05bb7173850e 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock)
 	if (pid == 0)
 		return 0;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	p = find_task_by_vpid(pid);
 	if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
-		   same_thread_group(p, current) : thread_group_leader(p))) {
+		   same_thread_group(p, current) : has_group_leader_pid(p))) {
 		error = -EINVAL;
 	}
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 
 	return error;
 }
@@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
 
 	INIT_LIST_HEAD(&new_timer->it.cpu.entry);
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
 		if (pid == 0) {
 			p = current;
@@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
 			p = current->group_leader;
 		} else {
 			p = find_task_by_vpid(pid);
-			if (p && !thread_group_leader(p))
+			if (p && !has_group_leader_pid(p))
 				p = NULL;
 		}
 	}
@@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
 	} else {
 		ret = -EINVAL;
 	}
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 
 	return ret;
 }
-- 
cgit v1.2.3


From 13b9b6e746d753d43270a78dd39694912646b5d9 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 10 Nov 2010 22:19:24 -0500
Subject: tracing: Fix module use of trace_bprintk()

On use of trace_printk() there's a macro that determines if the format
is static or a variable. If it is static, it defaults to __trace_bprintk()
otherwise it uses __trace_printk().

A while ago, Lai Jiangshan added __trace_bprintk(). In that patch, we
discussed a way to allow modules to use it. The difference between
__trace_bprintk() and __trace_printk() is that for faster processing,
just the format and args are stored in the trace instead of running
it through a sprintf function. In order to do this, the format used
by the __trace_bprintk() had to be persistent.

See commit 1ba28e02a18cbdbea123836f6c98efb09cbf59ec

The problem comes with trace_bprintk() where the module is unloaded.
The pointer left in the buffer is still pointing to the format.

To solve this issue, the formats in the module were copied into kernel
core. If the same format was used, they would use the same copy (to prevent
memory leak). This all worked well until we tried to merge everything.

At the time this was written, Lai Jiangshan, Frederic Weisbecker,
Ingo Molnar and myself were all touching the same code. When this was
merged, we lost the part of it that was in module.c. This kept out the
copying of the formats and unloading the module could cause bad pointers
left in the ring buffer.

This patch adds back (with updates required for current kernel) the
module code that sets up the necessary pointers.

Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/module.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/kernel/module.c b/kernel/module.c
index 437a74a7524a..d190664f25ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
 	kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
 			   mod->num_trace_events, GFP_KERNEL);
 #endif
+#ifdef CONFIG_TRACING
+	mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
+					 sizeof(*mod->trace_bprintk_fmt_start),
+					 &mod->num_trace_bprintk_fmt);
+	/*
+	 * This section contains pointers to allocated objects in the trace
+	 * code and not scanning it leads to false positives.
+	 */
+	kmemleak_scan_area(mod->trace_bprintk_fmt_start,
+			   sizeof(*mod->trace_bprintk_fmt_start) *
+			   mod->num_trace_bprintk_fmt, GFP_KERNEL);
+#endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 	/* sechdrs[0].sh_size is always zero */
 	mod->ftrace_callsites = section_objs(info, "__mcount_loc",
-- 
cgit v1.2.3


From b5908548537ccd3ada258ca5348df7ffc93e5a06 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 10 Nov 2010 22:29:49 -0500
Subject: tracing: Force arch_local_irq_* notrace for paravirt

When running ktest.pl randconfig tests, I would sometimes trigger
a lockdep annotation bug (possible reason: unannotated irqs-on).

This triggering happened right after function tracer self test was
executed. After doing a config bisect I found that this was caused with
having function tracer, paravirt guest, prove locking, and rcu torture
all enabled.

The rcu torture just enhanced the likelyhood of triggering the bug.
Prove locking was needed, since it was the thing that was bugging.
Function tracer would trace and disable interrupts in all sorts
of funny places.
paravirt guest would turn arch_local_irq_* into functions that would
be traced.

Besides the fact that tracing arch_local_irq_* is just a bad idea,
this is what is happening.

The bug happened simply in the local_irq_restore() code:

		if (raw_irqs_disabled_flags(flags)) {	\
			raw_local_irq_restore(flags);	\
			trace_hardirqs_off();		\
		} else {				\
			trace_hardirqs_on();		\
			raw_local_irq_restore(flags);	\
		}					\

The raw_local_irq_restore() was defined as arch_local_irq_restore().

Now imagine, we are about to enable interrupts. We go into the else
case and call trace_hardirqs_on() which tells lockdep that we are enabling
interrupts, so it sets the current->hardirqs_enabled = 1.

Then we call raw_local_irq_restore() which calls arch_local_irq_restore()
which gets traced!

Now in the function tracer we disable interrupts with local_irq_save().
This is fine, but flags is stored that we have interrupts disabled.

When the function tracer calls local_irq_restore() it does it, but this
time with flags set as disabled, so we go into the if () path.
This keeps interrupts disabled and calls trace_hardirqs_off() which
sets current->hardirqs_enabled = 0.

When the tracer is finished and proceeds with the original code,
we enable interrupts but leave current->hardirqs_enabled as 0. Which
now breaks lockdeps internal processing.

Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/paravirt.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 18e3b8a8709f..ef9975812c77 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
 #define __PV_IS_CALLEE_SAVE(func)			\
 	((struct paravirt_callee_save) { func })
 
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
 {
 	return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
 }
 
-static inline void arch_local_irq_restore(unsigned long f)
+static inline notrace void arch_local_irq_restore(unsigned long f)
 {
 	PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
 	PVOP_VCALLEE0(pv_irq_ops.irq_disable);
 }
 
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
 {
 	PVOP_VCALLEE0(pv_irq_ops.irq_enable);
 }
 
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
 {
 	unsigned long f;
 
-- 
cgit v1.2.3


From d9bcbf343ec63e1104b5276195888ee06b4d086f Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 11 Nov 2010 17:32:25 +0100
Subject: mmc: fix rmmod race for hosts using card-detection polling

MMC hosts that poll for card detection by defining the MMC_CAP_NEEDS_POLL
flag have a race on rmmod, where the delayed work is cancelled without
waiting for completed polling. To prevent this a _sync version of the work
cancellation has to be used.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 8f86d702e46e..31ae07a36576 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1559,7 +1559,7 @@ void mmc_stop_host(struct mmc_host *host)
 
 	if (host->caps & MMC_CAP_DISABLE)
 		cancel_delayed_work(&host->disable);
-	cancel_delayed_work(&host->detect);
+	cancel_delayed_work_sync(&host->detect);
 	mmc_flush_scheduled_work();
 
 	/* clear pm flags now and let card drivers set them as needed */
-- 
cgit v1.2.3


From 6c0aca288e726405b01dacb12cac556454d34b2a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 11 Nov 2010 21:18:43 +0100
Subject: x86: Ignore trap bits on single step exceptions

When a single step exception fires, the trap bits, used to
signal hardware breakpoints, are in a random state.

These trap bits might be set if another exception will follow,
like a breakpoint in the next instruction, or a watchpoint in the
previous one. Or there can be any junk there.

So if we handle these trap bits during the single step exception,
we are going to handle an exception twice, or we are going to
handle junk.

Just ignore them in this case.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=21332

Reported-by: Michael Stefaniuc <mstefani@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Maciej Rutecki <maciej.rutecki@gmail.com>
Cc: Alexandre Julliard <julliard@winehq.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: All since 2.6.33.x <stable@kernel.org>
---
 arch/x86/kernel/hw_breakpoint.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index ff15c9dcc25d..42c594254507 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 	dr6_p = (unsigned long *)ERR_PTR(args->err);
 	dr6 = *dr6_p;
 
+	/* If it's a single step, TRAP bits are random */
+	if (dr6 & DR_STEP)
+		return NOTIFY_DONE;
+
 	/* Do an early return if no trap bits are set in DR6 */
 	if ((dr6 & DR_TRAP_BITS) == 0)
 		return NOTIFY_DONE;
-- 
cgit v1.2.3


From 3c502e7a0255d82621ff25d60cc816624830497e Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 4 Nov 2010 17:33:01 -0500
Subject: perf,hw_breakpoint: Initialize hardware api earlier

When using early debugging, the kernel does not initialize the
hw_breakpoint API early enough and causes the late initialization of
the kernel debugger to fail. The boot arguments are:

    earlyprintk=vga ekgdboc=kbd kgdbwait

Then simply type "go" at the kdb prompt and boot. The kernel will
later emit the message:

    kgdb: Could not allocate hwbreakpoints

And at that point the kernel debugger will cease to work correctly.

The solution is to initialize the hw_breakpoint at the same time that
all the other perf call backs are initialized instead of using a
core_initcall() initialization which happens well after the kernel
debugger can make use of hardware breakpoints.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Ingo Molnar <mingo@elte.hu>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4CD3396D.1090308@windriver.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/hw_breakpoint.h | 4 ++++
 kernel/hw_breakpoint.c        | 3 +--
 kernel/perf_event.c           | 6 ++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index a2d6ea49ec56..d1e55fed2c7d 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -33,6 +33,8 @@ enum bp_type_idx {
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
+extern int __init init_hw_breakpoint(void);
+
 static inline void hw_breakpoint_init(struct perf_event_attr *attr)
 {
 	memset(attr, 0, sizeof(*attr));
@@ -108,6 +110,8 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
 
 #else /* !CONFIG_HAVE_HW_BREAKPOINT */
 
+static inline int __init init_hw_breakpoint(void) { return 0; }
+
 static inline struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered,
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2c9120f0afca..e5325825aeb6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
 	.read		= hw_breakpoint_pmu_read,
 };
 
-static int __init init_hw_breakpoint(void)
+int __init init_hw_breakpoint(void)
 {
 	unsigned int **task_bp_pinned;
 	int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
 
 	return -ENOMEM;
 }
-core_initcall(init_hw_breakpoint);
 
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 517d827f4982..05b7d8c72c6c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/irq_regs.h>
 
@@ -6295,6 +6296,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 
 void __init perf_event_init(void)
 {
+	int ret;
+
 	perf_event_init_all_cpus();
 	init_srcu_struct(&pmus_srcu);
 	perf_pmu_register(&perf_swevent);
@@ -6302,4 +6305,7 @@ void __init perf_event_init(void)
 	perf_pmu_register(&perf_task_clock);
 	perf_tp_register();
 	perf_cpu_notifier(perf_cpu_notify);
+
+	ret = init_hw_breakpoint();
+	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 }
-- 
cgit v1.2.3


From 91e86e560d0b3ce4c5fc64fd2bbb99f856a30a4e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 10 Nov 2010 12:56:12 +0100
Subject: tracing: Fix recursive user stack trace

The user stack trace can fault when examining the trace. Which
would call the do_page_fault handler, which would trace again,
which would do the user stack trace, which would fault and call
do_page_fault again ...

Thus this is causing a recursive bug. We need to have a recursion
detector here.

[ Resubmitted by Jiri Olsa ]

[ Eric Dumazet recommended using __this_cpu_* instead of __get_cpu_* ]

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
LKML-Reference: <1289390172-9730-3-git-send-email-jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 82d9b8106cd0..ee6a7339cf0e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1284,6 +1284,8 @@ void trace_dump_stack(void)
 	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
 }
 
+static DEFINE_PER_CPU(int, user_stack_count);
+
 void
 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
@@ -1302,6 +1304,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 	if (unlikely(in_nmi()))
 		return;
 
+	/*
+	 * prevent recursion, since the user stack tracing may
+	 * trigger other kernel events.
+	 */
+	preempt_disable();
+	if (__this_cpu_read(user_stack_count))
+		goto out;
+
+	__this_cpu_inc(user_stack_count);
+
+
+
 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
 					  sizeof(*entry), flags, pc);
 	if (!event)
@@ -1319,6 +1333,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 	save_stack_trace_user(&trace);
 	if (!filter_check_discard(call, entry, buffer, event))
 		ring_buffer_unlock_commit(buffer, event);
+
+	__this_cpu_dec(user_stack_count);
+
+ out:
+	preempt_enable();
 }
 
 #ifdef UNUSED
-- 
cgit v1.2.3


From d69b78ba1deaaa95ffa8dac5a9ca819ce454d52e Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Mon, 15 Nov 2010 10:20:52 +0100
Subject: ioprio: grab rcu_read_lock in sys_ioprio_{set,get}()

Using:
- CONFIG_LOCKUP_DETECTOR=y
- CONFIG_PREEMPT=y
- CONFIG_LOCKDEP=y
- CONFIG_PROVE_LOCKING=y
- CONFIG_PROVE_RCU=y
found a missing rcu lock during boot on a 512 MiB x86_64 ubuntu vm:
  ===================================================
  [ INFO: suspicious rcu_dereference_check() usage. ]
  ---------------------------------------------------
  kernel/pid.c:419 invoked rcu_dereference_check() without protection!

  other info that might help us debug this:

  rcu_scheduler_active = 1, debug_locks = 0
  1 lock held by ureadahead/1355:
   #0:  (tasklist_lock){.+.+..}, at: [<ffffffff8115bc09>] sys_ioprio_set+0x7f/0x29e

  stack backtrace:
  Pid: 1355, comm: ureadahead Not tainted 2.6.37-dbg-DEV #1
  Call Trace:
   [<ffffffff8109c10c>] lockdep_rcu_dereference+0xaa/0xb3
   [<ffffffff81088cbf>] find_task_by_pid_ns+0x44/0x5d
   [<ffffffff81088cfa>] find_task_by_vpid+0x22/0x24
   [<ffffffff8115bc3e>] sys_ioprio_set+0xb4/0x29e
   [<ffffffff8147cf21>] ? trace_hardirqs_off_thunk+0x3a/0x3c
   [<ffffffff8105c409>] sysenter_dispatch+0x7/0x2c
   [<ffffffff8147cee2>] ? trace_hardirqs_on_thunk+0x3a/0x3f

The fix is to:
a) grab rcu lock in sys_ioprio_{set,get}() and
b) avoid grabbing tasklist_lock.
Discussion in: http://marc.info/?l=linux-kernel&m=128951324702889

Signed-off-by: Greg Thelen <gthelen@google.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>

Modified by Jens to remove the now redundant inner rcu lock and
unlock since they are now protected by the outer lock.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/ioprio.c | 31 ++++++-------------------------
 1 file changed, 6 insertions(+), 25 deletions(-)

diff --git a/fs/ioprio.c b/fs/ioprio.c
index 2f7d05c89922..7da2a06508e5 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -103,22 +103,15 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 	}
 
 	ret = -ESRCH;
-	/*
-	 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
-	 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
-	 * in copy_process().
-	 */
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
-			rcu_read_lock();
 			if (!who)
 				p = current;
 			else
 				p = find_task_by_vpid(who);
 			if (p)
 				ret = set_task_ioprio(p, ioprio);
-			rcu_read_unlock();
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
@@ -141,12 +134,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 				break;
 
 			do_each_thread(g, p) {
-				int match;
-
-				rcu_read_lock();
-				match = __task_cred(p)->uid == who;
-				rcu_read_unlock();
-				if (!match)
+				if (__task_cred(p)->uid != who)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
@@ -160,7 +148,7 @@ free_uid:
 			ret = -EINVAL;
 	}
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -204,17 +192,15 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 	int ret = -ESRCH;
 	int tmpio;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
-			rcu_read_lock();
 			if (!who)
 				p = current;
 			else
 				p = find_task_by_vpid(who);
 			if (p)
 				ret = get_task_ioprio(p);
-			rcu_read_unlock();
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
@@ -241,12 +227,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 				break;
 
 			do_each_thread(g, p) {
-				int match;
-
-				rcu_read_lock();
-				match = __task_cred(p)->uid == user->uid;
-				rcu_read_unlock();
-				if (!match)
+				if (__task_cred(p)->uid != user->uid)
 					continue;
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
@@ -264,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 			ret = -EINVAL;
 	}
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }
-- 
cgit v1.2.3


From 24f3f6b5eff92608a62449e33bfac0eed1447d02 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Mon, 15 Nov 2010 09:18:49 -0500
Subject: arch/tile: fix rwlock so would-be write lockers don't block new
 readers

This avoids a deadlock in the IGMP code where one core gets a read
lock, another core starts trying to get a write lock (thus blocking
new readers), and then the first core tries to recursively re-acquire
the read lock.

We still try to preserve some degree of balance by giving priority
to additional write lockers that come along while the lock is held
for write, so they can all complete quickly and return the lock to
the readers.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/lib/spinlock_32.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 485e24d62c6b..5cd1c4004eca 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -167,23 +167,30 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
 	 * when we compare them.
 	 */
 	u32 my_ticket_;
+	u32 iterations = 0;
 
-	/* Take out the next ticket; this will also stop would-be readers. */
-	if (val & 1)
-		val = get_rwlock(rwlock);
-	rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT);
+	/*
+	 * Wait until there are no readers, then bump up the next
+	 * field and capture the ticket value.
+	 */
+	for (;;) {
+		if (!(val & 1)) {
+			if ((val >> RD_COUNT_SHIFT) == 0)
+				break;
+			rwlock->lock = val;
+		}
+		delay_backoff(iterations++);
+		val = __insn_tns((int *)&rwlock->lock);
+	}
 
-	/* Extract my ticket value from the original word. */
+	/* Take out the next ticket and extract my ticket value. */
+	rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT);
 	my_ticket_ = val >> WR_NEXT_SHIFT;
 
-	/*
-	 * Wait until the "current" field matches our ticket, and
-	 * there are no remaining readers.
-	 */
+	/* Wait until the "current" field matches our ticket. */
 	for (;;) {
 		u32 curr_ = val >> WR_CURR_SHIFT;
-		u32 readers = val >> RD_COUNT_SHIFT;
-		u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers;
+		u32 delta = ((my_ticket_ - curr_) & WR_MASK);
 		if (likely(delta == 0))
 			break;
 
-- 
cgit v1.2.3


From c2f6805d470af369a7337801deeecea800dbfe1c Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Mon, 15 Nov 2010 19:32:42 +0100
Subject: blk-throttle: Fix calculation of max number of WRITES to be
 dispatched

o Currently we try to dispatch more READS and less WRITES (75%, 25%) in one
  dispatch round. ummy pointed out that there is a bug in max_nr_writes
  calculation. This patch fixes it.

Reported-by: ummy y <yummylln@yahoo.com.cn>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-throttle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 56ad4531b412..004be80fd894 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -645,7 +645,7 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg,
 {
 	unsigned int nr_reads = 0, nr_writes = 0;
 	unsigned int max_nr_reads = throtl_grp_quantum*3/4;
-	unsigned int max_nr_writes = throtl_grp_quantum - nr_reads;
+	unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
 	struct bio *bio;
 
 	/* Try to dispatch 75% READS and 25% WRITES */
-- 
cgit v1.2.3


From 3e9bb2a071614f1d185740f31ac503ecba11d783 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Mon, 15 Nov 2010 19:32:43 +0100
Subject: block: fix amiga and atari floppy driver compile warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Geert, my crosstool don't produce warning below. I guess this has to do
something with compiler version.

- Geert noticed following warning during compilation.

  drivers/block/amiflop.c:1344: warning: ‘rq’ may be used uninitialized in
  this function
  drivers/block/ataflop.c:1402: warning: ‘rq’ may be used uninitialized in
  this function

- Initialize rq to NULL to fix the warning. If we can't find a suitable request
  to dispatch, this function should return NULL instead of a possibly garbage
  pointer.

- Cross compile tested only. Don't have hardware to test it.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/block/amiflop.c | 2 +-
 drivers/block/ataflop.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a1725e6488d3..7888501ad9ee 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1341,7 +1341,7 @@ static struct request *set_next_request(void)
 {
 	struct request_queue *q;
 	int cnt = FD_MAX_UNITS;
-	struct request *rq;
+	struct request *rq = NULL;
 
 	/* Find next queue we can dispatch from */
 	fdc_queue = fdc_queue + 1;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4e4cc6c828cb..605a67e40bbf 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1399,7 +1399,7 @@ static struct request *set_next_request(void)
 {
 	struct request_queue *q;
 	int old_pos = fdc_queue;
-	struct request *rq;
+	struct request *rq = NULL;
 
 	do {
 		q = unit[fdc_queue].disk->queue;
-- 
cgit v1.2.3


From 902bca00dc6e3b3ff5fbb1e32e5dbb45d5f30579 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 6 Nov 2010 12:36:13 +0100
Subject: firewire: net: count stats.tx_packets and stats.tx_bytes

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/net.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 18fdd9703b48..e2e968e732bc 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -906,6 +906,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask);
 static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 {
 	struct fwnet_device *dev = ptask->dev;
+	struct sk_buff *skb = ptask->skb;
 	unsigned long flags;
 	bool free;
 
@@ -916,8 +917,11 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 	/* Check whether we or the networking TX soft-IRQ is last user. */
 	free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link));
 
-	if (ptask->outstanding_pkts == 0)
+	if (ptask->outstanding_pkts == 0) {
 		list_del(&ptask->pt_link);
+		dev->netdev->stats.tx_packets++;
+		dev->netdev->stats.tx_bytes += skb->len;
+	}
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -926,7 +930,6 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 		u16 fg_off;
 		u16 datagram_label;
 		u16 lf;
-		struct sk_buff *skb;
 
 		/* Update the ptask to point to the next fragment and send it */
 		lf = fwnet_get_hdr_lf(&ptask->hdr);
@@ -953,7 +956,7 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 			datagram_label = fwnet_get_hdr_dgl(&ptask->hdr);
 			break;
 		}
-		skb = ptask->skb;
+
 		skb_pull(skb, ptask->max_payload);
 		if (ptask->outstanding_pkts > 1) {
 			fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_INTFRAG,
-- 
cgit v1.2.3


From 7ee11fa8d0a84b05cefe12b0bebc05ab0ea89cd6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 6 Nov 2010 16:57:28 +0100
Subject: firewire: net: fix memory leaks

a) fwnet_transmit_packet_done used to poison ptask->pt_link by list_del.
If fwnet_send_packet checked later whether it was responsible to clean
up (in the border case that the TX soft IRQ was outpaced by the AT-req
tasklet on another CPU), it missed this because ptask->pt_link was no
longer shown as empty.

b) If fwnet_write_complete got an rcode other than RCODE_COMPLETE, we
missed to free the skb and ptask entirely.

Also, count stats.tx_dropped and stats.tx_errors when rcode != 0.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/net.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index e2e968e732bc..3a27cee5bf26 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -916,9 +916,10 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 
 	/* Check whether we or the networking TX soft-IRQ is last user. */
 	free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link));
+	if (free)
+		list_del(&ptask->pt_link);
 
 	if (ptask->outstanding_pkts == 0) {
-		list_del(&ptask->pt_link);
 		dev->netdev->stats.tx_packets++;
 		dev->netdev->stats.tx_bytes += skb->len;
 	}
@@ -973,6 +974,31 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 		fwnet_free_ptask(ptask);
 }
 
+static void fwnet_transmit_packet_failed(struct fwnet_packet_task *ptask)
+{
+	struct fwnet_device *dev = ptask->dev;
+	unsigned long flags;
+	bool free;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* One fragment failed; don't try to send remaining fragments. */
+	ptask->outstanding_pkts = 0;
+
+	/* Check whether we or the networking TX soft-IRQ is last user. */
+	free = !list_empty(&ptask->pt_link);
+	if (free)
+		list_del(&ptask->pt_link);
+
+	dev->netdev->stats.tx_dropped++;
+	dev->netdev->stats.tx_errors++;
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	if (free)
+		fwnet_free_ptask(ptask);
+}
+
 static void fwnet_write_complete(struct fw_card *card, int rcode,
 				 void *payload, size_t length, void *data)
 {
@@ -980,11 +1006,12 @@ static void fwnet_write_complete(struct fw_card *card, int rcode,
 
 	ptask = data;
 
-	if (rcode == RCODE_COMPLETE)
+	if (rcode == RCODE_COMPLETE) {
 		fwnet_transmit_packet_done(ptask);
-	else
+	} else {
 		fw_error("fwnet_write_complete: failed: %x\n", rcode);
-		/* ??? error recovery */
+		fwnet_transmit_packet_failed(ptask);
+	}
 }
 
 static int fwnet_send_packet(struct fwnet_packet_task *ptask)
-- 
cgit v1.2.3


From 48553011cea504796e513350740781ac6745f556 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 6 Nov 2010 23:18:23 +0100
Subject: firewire: net: replace lists by counters

The current transmit code does not at all make use of
  - fwnet_device.packet_list
and only very limited use of
  - fwnet_device.broadcasted_list,
  - fwnet_device.queued_packets.
Their current function is to track whether the TX soft-IRQ finished
dealing with an skb when the AT-req tasklet takes over, and to discard
pending tx datagrams (if there are any) when the local node is removed.

The latter does actually contain a race condition bug with TX soft-IRQ
and AT-req tasklet.

Instead of these lists and the corresponding link in fwnet_packet_task,
  - a flag in fwnet_packet_task to track whether fwnet_tx is done,
  - a counter of queued datagrams in fwnet_device
do the job as well.

The above mentioned theoretic race condition is resolved by letting
fwnet_remove sleep until all datagrams were flushed.  It may sleep
almost arbitrarily long since fwnet_remove is executed in the context of
a multithreaded (concurrency managed) workqueue.

The type of max_payload is changed to u16 here to avoid waste in struct
fwnet_packet_task.  This value cannot exceed 4096 per IEEE 1394:2008
table 16-18 (or 32678 per specification of packet headers, if there is
ever going to be something else than beta mode).

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/net.c | 73 ++++++++++++++++++--------------------------------
 1 file changed, 26 insertions(+), 47 deletions(-)

diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 3a27cee5bf26..d2d488b9cd96 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/bug.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/firewire.h>
 #include <linux/firewire-constants.h>
@@ -169,15 +170,8 @@ struct fwnet_device {
 	struct fw_address_handler handler;
 	u64 local_fifo;
 
-	/* List of packets to be sent */
-	struct list_head packet_list;
-	/*
-	 * List of packets that were broadcasted.  When we get an ISO interrupt
-	 * one of them has been sent
-	 */
-	struct list_head broadcasted_list;
-	/* List of packets that have been sent but not yet acked */
-	struct list_head sent_list;
+	/* Number of tx datagrams that have been queued but not yet acked */
+	int queued_datagrams;
 
 	struct list_head peer_list;
 	struct fw_card *card;
@@ -195,7 +189,7 @@ struct fwnet_peer {
 	unsigned pdg_size;        /* pd_list size */
 
 	u16 datagram_label;       /* outgoing datagram label */
-	unsigned max_payload;     /* includes RFC2374_FRAG_HDR_SIZE overhead */
+	u16 max_payload;          /* includes RFC2374_FRAG_HDR_SIZE overhead */
 	int node_id;
 	int generation;
 	unsigned speed;
@@ -203,22 +197,18 @@ struct fwnet_peer {
 
 /* This is our task struct. It's used for the packet complete callback.  */
 struct fwnet_packet_task {
-	/*
-	 * ptask can actually be on dev->packet_list, dev->broadcasted_list,
-	 * or dev->sent_list depending on its current state.
-	 */
-	struct list_head pt_link;
 	struct fw_transaction transaction;
 	struct rfc2734_header hdr;
 	struct sk_buff *skb;
 	struct fwnet_device *dev;
 
 	int outstanding_pkts;
-	unsigned max_payload;
 	u64 fifo_addr;
 	u16 dest_node;
+	u16 max_payload;
 	u8 generation;
 	u8 speed;
+	u8 enqueued;
 };
 
 /*
@@ -915,9 +905,9 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 	ptask->outstanding_pkts--;
 
 	/* Check whether we or the networking TX soft-IRQ is last user. */
-	free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link));
+	free = (ptask->outstanding_pkts == 0 && ptask->enqueued);
 	if (free)
-		list_del(&ptask->pt_link);
+		dev->queued_datagrams--;
 
 	if (ptask->outstanding_pkts == 0) {
 		dev->netdev->stats.tx_packets++;
@@ -986,9 +976,9 @@ static void fwnet_transmit_packet_failed(struct fwnet_packet_task *ptask)
 	ptask->outstanding_pkts = 0;
 
 	/* Check whether we or the networking TX soft-IRQ is last user. */
-	free = !list_empty(&ptask->pt_link);
+	free = ptask->enqueued;
 	if (free)
-		list_del(&ptask->pt_link);
+		dev->queued_datagrams--;
 
 	dev->netdev->stats.tx_dropped++;
 	dev->netdev->stats.tx_errors++;
@@ -1069,9 +1059,11 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 		spin_lock_irqsave(&dev->lock, flags);
 
 		/* If the AT tasklet already ran, we may be last user. */
-		free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link));
+		free = (ptask->outstanding_pkts == 0 && !ptask->enqueued);
 		if (!free)
-			list_add_tail(&ptask->pt_link, &dev->broadcasted_list);
+			ptask->enqueued = true;
+		else
+			dev->queued_datagrams--;
 
 		spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -1086,9 +1078,11 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 	spin_lock_irqsave(&dev->lock, flags);
 
 	/* If the AT tasklet already ran, we may be last user. */
-	free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link));
+	free = (ptask->outstanding_pkts == 0 && !ptask->enqueued);
 	if (!free)
-		list_add_tail(&ptask->pt_link, &dev->sent_list);
+		ptask->enqueued = true;
+	else
+		dev->queued_datagrams--;
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -1350,10 +1344,12 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 		max_payload += RFC2374_FRAG_HDR_SIZE;
 	}
 
+	dev->queued_datagrams++;
+
 	spin_unlock_irqrestore(&dev->lock, flags);
 
 	ptask->max_payload = max_payload;
-	INIT_LIST_HEAD(&ptask->pt_link);
+	ptask->enqueued    = 0;
 
 	fwnet_send_packet(ptask);
 
@@ -1487,14 +1483,9 @@ static int fwnet_probe(struct device *_dev)
 	dev->broadcast_rcv_context = NULL;
 	dev->broadcast_xmt_max_payload = 0;
 	dev->broadcast_xmt_datagramlabel = 0;
-
 	dev->local_fifo = FWNET_NO_FIFO_ADDR;
-
-	INIT_LIST_HEAD(&dev->packet_list);
-	INIT_LIST_HEAD(&dev->broadcasted_list);
-	INIT_LIST_HEAD(&dev->sent_list);
+	dev->queued_datagrams = 0;
 	INIT_LIST_HEAD(&dev->peer_list);
-
 	dev->card = card;
 	dev->netdev = net;
 
@@ -1552,7 +1543,7 @@ static int fwnet_remove(struct device *_dev)
 	struct fwnet_peer *peer = dev_get_drvdata(_dev);
 	struct fwnet_device *dev = peer->dev;
 	struct net_device *net;
-	struct fwnet_packet_task *ptask, *pt_next;
+	int i;
 
 	mutex_lock(&fwnet_device_mutex);
 
@@ -1570,21 +1561,9 @@ static int fwnet_remove(struct device *_dev)
 					      dev->card);
 			fw_iso_context_destroy(dev->broadcast_rcv_context);
 		}
-		list_for_each_entry_safe(ptask, pt_next,
-					 &dev->packet_list, pt_link) {
-			dev_kfree_skb_any(ptask->skb);
-			kmem_cache_free(fwnet_packet_task_cache, ptask);
-		}
-		list_for_each_entry_safe(ptask, pt_next,
-					 &dev->broadcasted_list, pt_link) {
-			dev_kfree_skb_any(ptask->skb);
-			kmem_cache_free(fwnet_packet_task_cache, ptask);
-		}
-		list_for_each_entry_safe(ptask, pt_next,
-					 &dev->sent_list, pt_link) {
-			dev_kfree_skb_any(ptask->skb);
-			kmem_cache_free(fwnet_packet_task_cache, ptask);
-		}
+		for (i = 0; dev->queued_datagrams && i < 5; i++)
+			ssleep(1);
+		WARN_ON(dev->queued_datagrams);
 		list_del(&dev->dev_link);
 
 		free_netdev(net);
-- 
cgit v1.2.3


From b2268830f5cf29d94b3e4a2af0b795a8f28776fe Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 14 Nov 2010 14:35:40 +0100
Subject: firewire: net: throttle TX queue before running out of tlabels

This prevents firewire-net from submitting write requests in fast
succession until failure due to all 64 transaction labels were used up
for unfinished split transactions.  The netif_stop/wake_queue API is
used for this purpose.

Without this stop/wake mechanism, datagrams were simply lost whenever
the tlabel pool was exhausted.  Plus, tlabel exhaustion by firewire-net
also prevented other unrelated outbound transactions to be initiated.

The chosen queue depth was checked by me to hit the maximum possible
throughput with an OS X peer whose receive DMA is good enough to never
reject requests due to busy inbound request FIFO.  Current Linux peers
show a mixed picture of -5%...+15% change in bandwidth; their current
bottleneck are RCODE_BUSY situations (fewer or more, depending on TX
queue depth) due to too small AR buffer in firewire-ohci.

Maxim Levitsky tested this change with similar watermarks with a Linux
peer and some pending firewire-ohci improvements that address the
RCODE_BUSY problem and confirmed that these TX queue limits are good.

Note:  This removes some netif_wake_queue from reception code paths.
They were apparently copy&paste artefacts from a nonsensical
netif_wake_queue use in the older eth1394 driver.  This belongs only
into the transmit path.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Tested-by: Maxim Levitsky <maximlevitsky@gmail.com>
---
 drivers/firewire/net.c | 59 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index d2d488b9cd96..1a467a91fb0b 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -27,8 +27,14 @@
 #include <asm/unaligned.h>
 #include <net/arp.h>
 
-#define FWNET_MAX_FRAGMENTS	25	/* arbitrary limit */
-#define FWNET_ISO_PAGE_COUNT	(PAGE_SIZE < 16 * 1024 ? 4 : 2)
+/* rx limits */
+#define FWNET_MAX_FRAGMENTS		30 /* arbitrary, > TX queue depth */
+#define FWNET_ISO_PAGE_COUNT		(PAGE_SIZE < 16*1024 ? 4 : 2)
+
+/* tx limits */
+#define FWNET_MAX_QUEUED_DATAGRAMS	20 /* < 64 = number of tlabels */
+#define FWNET_MIN_QUEUED_DATAGRAMS	10 /* should keep AT DMA busy enough */
+#define FWNET_TX_QUEUE_LEN		FWNET_MAX_QUEUED_DATAGRAMS /* ? */
 
 #define IEEE1394_BROADCAST_CHANNEL	31
 #define IEEE1394_ALL_NODES		(0xffc0 | 0x003f)
@@ -640,8 +646,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 		net->stats.rx_packets++;
 		net->stats.rx_bytes += skb->len;
 	}
-	if (netif_queue_stopped(net))
-		netif_wake_queue(net);
 
 	return 0;
 
@@ -650,8 +654,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 	net->stats.rx_dropped++;
 
 	dev_kfree_skb_any(skb);
-	if (netif_queue_stopped(net))
-		netif_wake_queue(net);
 
 	return -ENOENT;
 }
@@ -783,15 +785,10 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
 	 * Datagram is not complete, we're done for the
 	 * moment.
 	 */
-	spin_unlock_irqrestore(&dev->lock, flags);
-
-	return 0;
+	retval = 0;
  fail:
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	if (netif_queue_stopped(net))
-		netif_wake_queue(net);
-
 	return retval;
 }
 
@@ -891,6 +888,13 @@ static void fwnet_free_ptask(struct fwnet_packet_task *ptask)
 	kmem_cache_free(fwnet_packet_task_cache, ptask);
 }
 
+/* Caller must hold dev->lock. */
+static void dec_queued_datagrams(struct fwnet_device *dev)
+{
+	if (--dev->queued_datagrams == FWNET_MIN_QUEUED_DATAGRAMS)
+		netif_wake_queue(dev->netdev);
+}
+
 static int fwnet_send_packet(struct fwnet_packet_task *ptask);
 
 static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
@@ -907,7 +911,7 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 	/* Check whether we or the networking TX soft-IRQ is last user. */
 	free = (ptask->outstanding_pkts == 0 && ptask->enqueued);
 	if (free)
-		dev->queued_datagrams--;
+		dec_queued_datagrams(dev);
 
 	if (ptask->outstanding_pkts == 0) {
 		dev->netdev->stats.tx_packets++;
@@ -978,7 +982,7 @@ static void fwnet_transmit_packet_failed(struct fwnet_packet_task *ptask)
 	/* Check whether we or the networking TX soft-IRQ is last user. */
 	free = ptask->enqueued;
 	if (free)
-		dev->queued_datagrams--;
+		dec_queued_datagrams(dev);
 
 	dev->netdev->stats.tx_dropped++;
 	dev->netdev->stats.tx_errors++;
@@ -1063,7 +1067,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 		if (!free)
 			ptask->enqueued = true;
 		else
-			dev->queued_datagrams--;
+			dec_queued_datagrams(dev);
 
 		spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -1082,7 +1086,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 	if (!free)
 		ptask->enqueued = true;
 	else
-		dev->queued_datagrams--;
+		dec_queued_datagrams(dev);
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -1248,6 +1252,15 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	struct fwnet_peer *peer;
 	unsigned long flags;
 
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* Can this happen? */
+	if (netif_queue_stopped(dev->netdev)) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+
+		return NETDEV_TX_BUSY;
+	}
+
 	ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC);
 	if (ptask == NULL)
 		goto fail;
@@ -1266,9 +1279,6 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	proto = hdr_buf.h_proto;
 	dg_size = skb->len;
 
-	/* serialize access to peer, including peer->datagram_label */
-	spin_lock_irqsave(&dev->lock, flags);
-
 	/*
 	 * Set the transmission type for the packet.  ARP packets and IP
 	 * broadcast packets are sent via GASP.
@@ -1290,7 +1300,7 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 
 		peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid));
 		if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR)
-			goto fail_unlock;
+			goto fail;
 
 		generation         = peer->generation;
 		dest_node          = peer->node_id;
@@ -1344,7 +1354,8 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 		max_payload += RFC2374_FRAG_HDR_SIZE;
 	}
 
-	dev->queued_datagrams++;
+	if (++dev->queued_datagrams == FWNET_MAX_QUEUED_DATAGRAMS)
+		netif_stop_queue(dev->netdev);
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -1355,9 +1366,9 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 
 	return NETDEV_TX_OK;
 
- fail_unlock:
-	spin_unlock_irqrestore(&dev->lock, flags);
  fail:
+	spin_unlock_irqrestore(&dev->lock, flags);
+
 	if (ptask)
 		kmem_cache_free(fwnet_packet_task_cache, ptask);
 
@@ -1403,7 +1414,7 @@ static void fwnet_init_dev(struct net_device *net)
 	net->addr_len		= FWNET_ALEN;
 	net->hard_header_len	= FWNET_HLEN;
 	net->type		= ARPHRD_IEEE1394;
-	net->tx_queue_len	= 10;
+	net->tx_queue_len	= FWNET_TX_QUEUE_LEN;
 }
 
 /* caller must hold fwnet_device_mutex */
-- 
cgit v1.2.3


From bbe425cd9ae83eacd0c9f09df2bf56dc911a54cd Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Wed, 17 Nov 2010 11:56:13 +0100
Subject: cciss: fix build for PROC_FS disabled

The recent patch to fix the removal of a non-existing proc
directory introduced this build problem for !CONFIG_PROC_FS:

drivers/block/cciss.c:4929: error: 'proc_cciss' undeclared (first use in this function)

Fix it by moving proc_cciss outside of the CONFIG_PROC_FS scope.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 drivers/block/cciss.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a67d0a611a8a..f291587d753e 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -66,6 +66,7 @@ MODULE_VERSION("3.6.26");
 MODULE_LICENSE("GPL");
 
 static DEFINE_MUTEX(cciss_mutex);
+static struct proc_dir_entry *proc_cciss;
 
 #include "cciss_cmd.h"
 #include "cciss.h"
@@ -363,8 +364,6 @@ static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
 #define ENG_GIG_FACTOR (ENG_GIG/512)
 #define ENGAGE_SCSI	"engage scsi"
 
-static struct proc_dir_entry *proc_cciss;
-
 static void cciss_seq_show_header(struct seq_file *seq)
 {
 	ctlr_info_t *h = seq->private;
-- 
cgit v1.2.3


From b432b4b3440a34c1430fcd66bab783640724bd28 Mon Sep 17 00:00:00 2001
From: kishore kadiyala <kishore.kadiyala@ti.com>
Date: Wed, 17 Nov 2010 22:35:32 -0500
Subject: mmc: omap4: hsmmc: Fix improper card detection while booting

While booting OMAP4 ES2.0 boards, cards on MMC1 and MMC2 controllers
are not getting detected sometimes.

During reset of command/data line, wrong pointer to base address
was passed while read operation to SYSCTL register, thus impacting
the updated reset logic.

Passing the correct base address fixes the issue.

Signed-off-by: Kishore Kadiyala <kishore.kadiyala@ti.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Madhusudhan Chikkature <madhu.cr@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/omap_hsmmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 82a1079bbdc7..5d46021cbb57 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1002,7 +1002,7 @@ static inline void omap_hsmmc_reset_controller_fsm(struct omap_hsmmc_host *host,
 	 * Monitor a 0->1 transition first
 	 */
 	if (mmc_slot(host).features & HSMMC_HAS_UPDATED_RESET) {
-		while ((!(OMAP_HSMMC_READ(host, SYSCTL) & bit))
+		while ((!(OMAP_HSMMC_READ(host->base, SYSCTL) & bit))
 					&& (i++ < limit))
 			cpu_relax();
 	}
-- 
cgit v1.2.3


From 4d048435e9864998f6a6ad16422393d42322716d Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 17 Nov 2010 11:44:00 +0000
Subject: ARM: mach-shmobile: sh7372 USB0/IIC1 MSTP fix

Fix a MSTP assignment problem in the sh7372 clock
framework code. The USB drivers should attach to
MSTP322 not MSTP33 where IIC1 is located.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/clock-sh7372.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 7db31e6c6bf2..d3313a95dbfc 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -647,8 +647,8 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */
 	CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */
 	CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */
-	CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP323]), /* USB0 */
-	CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP323]), /* USB0 */
+	CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */
+	CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP322]), /* USB0 */
 	CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP314]), /* SDHI0 */
 	CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */
 	CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMC */
-- 
cgit v1.2.3


From 0e2af2a9abf94b408ff70679b692a8644fed4aab Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Fri, 12 Nov 2010 09:50:54 -0500
Subject: x86, hw_nmi: Move backtrace_mask declaration under
 ARCH_HAS_NMI_WATCHDOG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

backtrace_mask has been used under the code context of
ARCH_HAS_NMI_WATCHDOG. So put it into that context.
We were warned by the following warning:

  arch/x86/kernel/apic/hw_nmi.c:21: warning: ‘backtrace_mask’ defined but not used

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
LKML-Reference: <1289573455-3410-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/hw_nmi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..62f6e1e55b90 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,15 +17,16 @@
 #include <linux/nmi.h>
 #include <linux/module.h>
 
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
 u64 hw_nmi_get_sample_period(void)
 {
 	return (u64)(cpu_khz) * 1000 * 60;
 }
 
 #ifdef ARCH_HAS_NMI_WATCHDOG
+
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
 void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;
-- 
cgit v1.2.3


From 96e612ffc301372d3a3b94e2cb5d1e0c1c207dd1 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 16 Nov 2010 13:45:16 +0900
Subject: x86, asm: Fix binutils 2.15 build failure

Add parentheses around one pushl_cfi argument.

Commit df5d1874 "x86: Use {push,pop}{l,q}_cfi in more places"
caused GNU assembler 2.15 (Debian Sarge) to fail. It is still
failing as of commit 07bd8516 "x86, asm: Restore parentheses
around one pushl_cfi argument". This patch solves build failure
with GNU assembler 2.15.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Jan Beulich <jbeulich@novell.com>
Cc: heukelum@fastmail.fm
Cc: hpa@linux.intel.com
LKML-Reference: <201011160445.oAG4jGif079860@www262.sakura.ne.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 59e175e89599..591e60104278 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,7 +395,7 @@ sysenter_past_esp:
 	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
-	pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp)
+	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
 	CFI_REL_OFFSET eip, 0
 
 	pushl_cfi %eax
-- 
cgit v1.2.3


From 9223081f54e3dc5045fe41a475165d9003c9a779 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 13 Nov 2010 10:52:09 -0800
Subject: x86: Use online node real index in calulate_tbl_offset()

Found a NUMA system that doesn't have RAM installed at the first
socket which hangs while executing init scripts.

bisected it to:

 | commit 932967202182743c01a2eee4bdfa2c42697bc586
 | Author: Shaohua Li <shaohua.li@intel.com>
 | Date:   Wed Oct 20 11:07:03 2010 +0800
 |
 |     x86: Spread tlb flush vector between nodes

It turns out when first socket is not online it could have cpus on
node1 tlb_offset set to bigger than NUM_INVALIDATE_TLB_VECTORS.

That could affect systems like 4 sockets, but socket 2 doesn't
have installed, sockets 3 will get too big tlb_offset.

Need to use real online node idx.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Shaohua Li <shaohua.li@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <4CDEDE59.40603@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/tlb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 12cdbb17ad18..6acc724d5d8f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 
 static void __cpuinit calculate_tlb_offset(void)
 {
-	int cpu, node, nr_node_vecs;
+	int cpu, node, nr_node_vecs, idx = 0;
 	/*
 	 * we are changing tlb_vector_offset for each CPU in runtime, but this
 	 * will not cause inconsistency, as the write is atomic under X86. we
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void)
 		nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
 
 	for_each_online_node(node) {
-		int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
+		int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
 			nr_node_vecs;
 		int cpu_offset = 0;
 		for_each_cpu(cpu, cpumask_of_node(node)) {
@@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void)
 			cpu_offset++;
 			cpu_offset = cpu_offset % nr_node_vecs;
 		}
+		idx++;
 	}
 }
 
-- 
cgit v1.2.3


From 8191c9f69202d4dbc66063cb92059b8a58640d34 Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Tue, 16 Nov 2010 16:23:52 -0600
Subject: x86: UV: Address interrupt/IO port operation conflict

This patch for SGI UV systems addresses a problem whereby
interrupt transactions being looped back from a local IOH,
through the hub to a local CPU can (erroneously) conflict with
IO port operations and other transactions.

To workaound this we set a high bit in the APIC IDs used for
interrupts. This bit appears to be ignored by the sockets, but
it avoids the conflict in the hub.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20101116222352.GA8155@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
___

 arch/x86/include/asm/uv/uv_hub.h   |    4 ++++
 arch/x86/include/asm/uv/uv_mmrs.h  |   19 ++++++++++++++++++-
 arch/x86/kernel/apic/x2apic_uv_x.c |   25 +++++++++++++++++++++++--
 arch/x86/platform/uv/tlb_uv.c      |    2 +-
 arch/x86/platform/uv/uv_time.c     |    4 +++-
 5 files changed, 49 insertions(+), 5 deletions(-)
---
 arch/x86/include/asm/uv/uv_hub.h   |  4 ++++
 arch/x86/include/asm/uv/uv_mmrs.h  | 19 ++++++++++++++++++-
 arch/x86/kernel/apic/x2apic_uv_x.c | 25 +++++++++++++++++++++++--
 arch/x86/platform/uv/tlb_uv.c      |  2 +-
 arch/x86/platform/uv/uv_time.c     |  4 +++-
 5 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index e969f691cbfd..a501741c2335 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -199,6 +199,8 @@ union uvh_apicid {
 #define UVH_APICID		0x002D0E00L
 #define UV_APIC_PNODE_SHIFT	6
 
+#define UV_APICID_HIBIT_MASK	0xffff0000
+
 /* Local Bus from cpu's perspective */
 #define LOCAL_BUS_BASE		0x1c00000
 #define LOCAL_BUS_SIZE		(4 * 1024 * 1024)
@@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
 	}
 }
 
+extern unsigned int uv_apicid_hibits;
 static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
 {
+	apicid |= uv_apicid_hibits;
 	return (1UL << UVH_IPI_INT_SEND_SHFT) |
 			((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
 			(mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 6d90adf4428a..20cafeac7455 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
  *
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
@@ -753,6 +753,23 @@ union uvh_lb_bau_sb_descriptor_base_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                   UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK                     */
+/* ========================================================================= */
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0
+
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
+
+union uvh_lb_target_physical_apic_id_mask_u {
+	unsigned long v;
+	struct uvh_lb_target_physical_apic_id_mask_s {
+		unsigned long bit_enables : 32;  /* RW */
+		unsigned long rsvd_32_63  : 32;  /*    */
+	} s;
+};
+
 /* ========================================================================= */
 /*                               UVH_NODE_ID                                 */
 /* ========================================================================= */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 194539aea175..c1c52c341f40 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr;
 static union uvh_apicid uvh_apicid;
 int uv_min_hub_revision_id;
 EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+unsigned int uv_apicid_hibits;
+EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 static DEFINE_SPINLOCK(uv_nmi_lock);
 
 static inline bool is_GRU_range(u64 start, u64 end)
@@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void)
 		uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
 }
 
+/*
+ * Add an extra bit as dictated by bios to the destination apicid of
+ * interrupts potentially passing through the UV HUB.  This prevents
+ * a deadlock between interrupts and IO port operations.
+ */
+static void __init uv_set_apicid_hibit(void)
+{
+	union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
+	unsigned long *mmr;
+
+	mmr = early_ioremap(UV_LOCAL_MMR_BASE |
+		UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
+	apicid_mask.v = *mmr;
+	early_iounmap(mmr, sizeof(*mmr));
+	uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
+}
+
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	int nodeid;
@@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 			__get_cpu_var(x2apic_extra_bits) =
 				nodeid << (uvh_apicid.s.pnode_shift - 1);
 			uv_system_type = UV_NON_UNIQUE_APIC;
+			uv_set_apicid_hibit();
 			return 1;
 		}
 	}
@@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
 	int pnode;
 
 	pnode = uv_apicid_to_pnode(phys_apicid);
+	phys_apicid |= uv_apicid_hibits;
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
 	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	int cpu = cpumask_first(cpumask);
 
 	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
+		return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
 	else
 		return BAD_APICID;
 }
@@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			break;
 	}
-	return per_cpu(x86_cpu_to_apicid, cpu);
+	return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
 }
 
 static unsigned int x2apic_get_apic_id(unsigned long x)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index a318194002b5..ba9caa808a9c 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector)
 	 * the below initialization can't be in firmware because the
 	 * messaging IRQ will be determined by the OS
 	 */
-	apicid = uvhub_to_first_apicid(uvhub);
+	apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
 	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
 				      ((apicid << 32) | vector));
 }
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 56e421bc379b..9daf5d1af9f1 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu)
 
 	apicid = cpu_physical_id(cpu);
 	pnode = uv_apicid_to_pnode(apicid);
+	apicid |= uv_apicid_hibits;
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	      (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
 	      (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
@@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode)
 static int uv_setup_intr(int cpu, u64 expires)
 {
 	u64 val;
+	unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits;
 	int pnode = uv_cpu_to_pnode(cpu);
 
 	uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
@@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires)
 		UVH_EVENT_OCCURRED0_RTC1_MASK);
 
 	val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
-		((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
+		((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
 
 	/* Set configuration */
 	uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
-- 
cgit v1.2.3


From b5482cfa1c95a188b3054fa33274806add91bbe5 Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@intel.com>
Date: Tue, 16 Nov 2010 17:34:02 +0800
Subject: sched: Fix volanomark performance regression

Commit fab4762 triggers excessive idle balancing, causing a ~30% loss in
volanomark throughput. Remove idle balancing throttle reset.

Originally-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Nikhil Rao <ncrao@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1289928732.5169.211.camel@maggy.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_fair.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 52ab113d8bb9..ba0556dc7c06 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1758,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
 	set_task_cpu(p, this_cpu);
 	activate_task(this_rq, p, 0);
 	check_preempt_curr(this_rq, p, 0);
-
-	/* re-arm NEWIDLE balancing when moving tasks */
-	src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost;
-	this_rq->idle_stamp = 0;
 }
 
 /*
-- 
cgit v1.2.3


From d5ad140bc1505a98c0f040937125bfcbb508078f Mon Sep 17 00:00:00 2001
From: Nikhil Rao <ncrao@google.com>
Date: Wed, 17 Nov 2010 11:42:04 -0800
Subject: sched: Fix idle balancing

An earlier commit reverts idle balancing throttling reset to fix a 30%
regression in volanomark throughput. We still need to reset idle_stamp
when we pull a task in newidle balance.

Reported-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: Nikhil Rao <ncrao@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1290022924-3548-1-git-send-email-ncrao@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_fair.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ba0556dc7c06..00ebd7686676 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3215,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 		interval = msecs_to_jiffies(sd->balance_interval);
 		if (time_after(next_balance, sd->last_balance + interval))
 			next_balance = sd->last_balance + interval;
-		if (pulled_task)
+		if (pulled_task) {
+			this_rq->idle_stamp = 0;
 			break;
+		}
 	}
 
 	raw_spin_lock(&this_rq->lock);
-- 
cgit v1.2.3


From 8882135bcd332f294df5455747ea43ba9e6f77ad Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 9 Nov 2010 19:01:43 +0100
Subject: perf: Fix owner-list vs exit

Oleg noticed that a perf-fd keeping a reference on the creating task
leads to a few funny side effects.

There's two different aspects to this:

  - kernel based perf-events, these should not take out
    a reference on the creating task and appear on the task's
    event list since they're not bound to fds nor visible
    to userspace.

  - fork() and pthread_create(), these can lead to the creating
    task dying (and thus the task's event-list becomming useless)
    but keeping the list and ref alive until the event is closed.

Combined they lead to malfunction of the ptrace hw_tracepoints.

Cure this by not considering kernel based perf_events for the
owner-list and destroying the owner-list when the owner dies.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Oleg Nesterov <oleg@redhat.com>
LKML-Reference: <1289576883.2084.286.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c | 63 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f818d9d2dc93..671f6c8c8a32 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2235,11 +2235,6 @@ int perf_event_release_kernel(struct perf_event *event)
 	raw_spin_unlock_irq(&ctx->lock);
 	mutex_unlock(&ctx->mutex);
 
-	mutex_lock(&event->owner->perf_event_mutex);
-	list_del_init(&event->owner_entry);
-	mutex_unlock(&event->owner->perf_event_mutex);
-	put_task_struct(event->owner);
-
 	free_event(event);
 
 	return 0;
@@ -2252,9 +2247,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 static int perf_release(struct inode *inode, struct file *file)
 {
 	struct perf_event *event = file->private_data;
+	struct task_struct *owner;
 
 	file->private_data = NULL;
 
+	rcu_read_lock();
+	owner = ACCESS_ONCE(event->owner);
+	/*
+	 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
+	 * !owner it means the list deletion is complete and we can indeed
+	 * free this event, otherwise we need to serialize on
+	 * owner->perf_event_mutex.
+	 */
+	smp_read_barrier_depends();
+	if (owner) {
+		/*
+		 * Since delayed_put_task_struct() also drops the last
+		 * task reference we can safely take a new reference
+		 * while holding the rcu_read_lock().
+		 */
+		get_task_struct(owner);
+	}
+	rcu_read_unlock();
+
+	if (owner) {
+		mutex_lock(&owner->perf_event_mutex);
+		/*
+		 * We have to re-check the event->owner field, if it is cleared
+		 * we raced with perf_event_exit_task(), acquiring the mutex
+		 * ensured they're done, and we can proceed with freeing the
+		 * event.
+		 */
+		if (event->owner)
+			list_del_init(&event->owner_entry);
+		mutex_unlock(&owner->perf_event_mutex);
+		put_task_struct(owner);
+	}
+
 	return perf_event_release_kernel(event);
 }
 
@@ -5678,7 +5707,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	mutex_unlock(&ctx->mutex);
 
 	event->owner = current;
-	get_task_struct(current);
+
 	mutex_lock(&current->perf_event_mutex);
 	list_add_tail(&event->owner_entry, &current->perf_event_list);
 	mutex_unlock(&current->perf_event_mutex);
@@ -5746,12 +5775,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	++ctx->generation;
 	mutex_unlock(&ctx->mutex);
 
-	event->owner = current;
-	get_task_struct(current);
-	mutex_lock(&current->perf_event_mutex);
-	list_add_tail(&event->owner_entry, &current->perf_event_list);
-	mutex_unlock(&current->perf_event_mutex);
-
 	return event;
 
 err_free:
@@ -5902,8 +5925,24 @@ again:
  */
 void perf_event_exit_task(struct task_struct *child)
 {
+	struct perf_event *event, *tmp;
 	int ctxn;
 
+	mutex_lock(&child->perf_event_mutex);
+	list_for_each_entry_safe(event, tmp, &child->perf_event_list,
+				 owner_entry) {
+		list_del_init(&event->owner_entry);
+
+		/*
+		 * Ensure the list deletion is visible before we clear
+		 * the owner, closes a race against perf_release() where
+		 * we need to serialize on the owner->perf_event_mutex.
+		 */
+		smp_wmb();
+		event->owner = NULL;
+	}
+	mutex_unlock(&child->perf_event_mutex);
+
 	for_each_task_context_nr(ctxn)
 		perf_event_exit_task_context(child, ctxn);
 }
-- 
cgit v1.2.3


From 94e8ba728640dc01375a14e337f3b892bfacbeeb Mon Sep 17 00:00:00 2001
From: Sergio Aguirre <saaguirre@ti.com>
Date: Tue, 16 Nov 2010 12:02:47 -0600
Subject: irq_work: Drop cmpxchg() result

The compiler warned us about:

 kernel/irq_work.c: In function 'irq_work_run':
 kernel/irq_work.c:148: warning: value computed is not used

Dropping the cmpxchg() result is indeed weird, but correct -
so annotate away the warning.

Signed-off-by: Sergio Aguirre <saaguirre@ti.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1289930567-17828-1-git-send-email-saaguirre@ti.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/irq_work.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index f16763ff8481..90f881904bb1 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -145,7 +145,9 @@ void irq_work_run(void)
 		 * Clear the BUSY bit and return to the free state if
 		 * no-one else claimed it meanwhile.
 		 */
-		cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+		(void)cmpxchg(&entry->next,
+			      next_flags(NULL, IRQ_WORK_BUSY),
+			      NULL);
 	}
 }
 EXPORT_SYMBOL_GPL(irq_work_run);
-- 
cgit v1.2.3


From de31ec8a31046111befd16a7083e3bdda2ff42cf Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 18 Nov 2010 19:16:55 +0900
Subject: x86/kprobes: Prevent kprobes to probe on save_args()

Prevent kprobes to probe on save_args() since this function
will be called from breakpoint exception handler. That will
cause infinit loop on breakpoint handling.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: 2nddept-manager@sdl.hitachi.co.jp
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
LKML-Reference: <20101118101655.2779.2816.stgit@ltc236.sdl.hitachi.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/entry_64.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fe2690d71c0c..e3ba417e8697 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /* save partial stack frame */
+	.pushsection .kprobes.text, "ax"
 ENTRY(save_args)
 	XCPT_FRAME
 	cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
 	ret
 	CFI_ENDPROC
 END(save_args)
+	.popsection
 
 ENTRY(save_rest)
 	PARTIAL_FRAME 1 REST_SKIP+8
-- 
cgit v1.2.3


From 37db6c8f1d0c4b8f01dc049f3a893b725288660f Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Tue, 16 Nov 2010 08:25:08 +0000
Subject: x86-64: Fix and clean up AMD Fam10 MMCONF enabling

Candidate memory ranges were not calculated properly (start
addresses got needlessly rounded down, and end addresses didn't
get rounded up at all), address comparison for secondary CPUs
was done on only part of the address, and disabled status wasn't
tracked properly.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
LKML-Reference: <4CE24DF40200007800022737@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/msr-index.h   |  2 +-
 arch/x86/kernel/mmconf-fam10h_64.c | 64 ++++++++++++++++++--------------------
 2 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3ea3dc487047..6b89f5e86021 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -128,7 +128,7 @@
 #define FAM10H_MMIO_CONF_ENABLE		(1<<0)
 #define FAM10H_MMIO_CONF_BUSRANGE_MASK	0xf
 #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
-#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffff
+#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_SHIFT	20
 #define MSR_FAM10H_NODE_ID		0xc001100c
 
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 6da143c2a6b8..ac861b8348e2 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -25,7 +25,6 @@ struct pci_hostbridge_probe {
 };
 
 static u64 __cpuinitdata fam10h_pci_mmconf_base;
-static int __cpuinitdata fam10h_pci_mmconf_base_status;
 
 static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
 	{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
@@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
 	return start1 - start2;
 }
 
-/*[47:0] */
-/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */
+#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT)
+#define MMCONF_MASK (~(MMCONF_UNIT - 1))
+#define MMCONF_SIZE (MMCONF_UNIT << 8)
+/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
 #define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
-#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32)))
+#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
 static void __cpuinit get_fam10h_pci_mmconf_base(void)
 {
 	int i;
@@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 	struct range range[8];
 
 	/* only try to get setting from BSP */
-	/* -1 or 1 */
-	if (fam10h_pci_mmconf_base_status)
+	if (fam10h_pci_mmconf_base)
 		return;
 
 	if (!early_pci_allowed())
-		goto fail;
+		return;
 
 	found = 0;
 	for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
@@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 	}
 
 	if (!found)
-		goto fail;
+		return;
 
 	/* SYS_CFG */
 	address = MSR_K8_SYSCFG;
@@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 
 	/* TOP_MEM2 is not enabled? */
 	if (!(val & (1<<21))) {
-		tom2 = 0;
+		tom2 = 1ULL << 32;
 	} else {
 		/* TOP_MEM2 */
 		address = MSR_K8_TOP_MEM2;
 		rdmsrl(address, val);
-		tom2 = val & (0xffffULL<<32);
+		tom2 = max(val & 0xffffff800000ULL, 1ULL << 32);
 	}
 
 	if (base <= tom2)
-		base = tom2 + (1ULL<<32);
+		base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK;
 
 	/*
 	 * need to check if the range is in the high mmio range that is
@@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 		if (!(reg & 3))
 			continue;
 
-		start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/
+		start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/
 		reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
-		end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/
+		end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/
 
-		if (!end)
+		if (end < tom2)
 			continue;
 
 		range[hi_mmio_num].start = start;
@@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 
 	if (range[hi_mmio_num - 1].end < base)
 		goto out;
-	if (range[0].start > base)
+	if (range[0].start > base + MMCONF_SIZE)
 		goto out;
 
 	/* need to find one window */
-	base = range[0].start - (1ULL << 32);
+	base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT;
 	if ((base > tom2) && BASE_VALID(base))
 		goto out;
-	base = range[hi_mmio_num - 1].end + (1ULL << 32);
-	if ((base > tom2) && BASE_VALID(base))
+	base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK;
+	if (BASE_VALID(base))
 		goto out;
 	/* need to find window between ranges */
-	if (hi_mmio_num > 1)
-	for (i = 0; i < hi_mmio_num - 1; i++) {
-		if (range[i + 1].start > (range[i].end + (1ULL << 32))) {
-			base = range[i].end + (1ULL << 32);
-			if ((base > tom2) && BASE_VALID(base))
-				goto out;
-		}
+	for (i = 1; i < hi_mmio_num; i++) {
+		base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK;
+		val = range[i].start & MMCONF_MASK;
+		if (val >= base + MMCONF_SIZE && BASE_VALID(base))
+			goto out;
 	}
-
-fail:
-	fam10h_pci_mmconf_base_status = -1;
 	return;
+
 out:
 	fam10h_pci_mmconf_base = base;
-	fam10h_pci_mmconf_base_status = 1;
 }
 
 void __cpuinit fam10h_check_enable_mmcfg(void)
@@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
 
 		/* only trust the one handle 256 buses, if acpi=off */
 		if (!acpi_pci_disabled || busnbits >= 8) {
-			u64 base;
-			base = val & (0xffffULL << 32);
-			if (fam10h_pci_mmconf_base_status <= 0) {
+			u64 base = val & MMCONF_MASK;
+
+			if (!fam10h_pci_mmconf_base) {
 				fam10h_pci_mmconf_base = base;
-				fam10h_pci_mmconf_base_status = 1;
 				return;
 			} else if (fam10h_pci_mmconf_base ==  base)
 				return;
@@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
 	 * with 256 buses
 	 */
 	get_fam10h_pci_mmconf_base();
-	if (fam10h_pci_mmconf_base_status <= 0)
+	if (!fam10h_pci_mmconf_base) {
+		pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
 		return;
+	}
 
 	printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
 	val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) |
-- 
cgit v1.2.3


From 14870b457524e745f1a118e17873d104b1a47b70 Mon Sep 17 00:00:00 2001
From: Abhijith Das <adas@redhat.com>
Date: Thu, 18 Nov 2010 11:24:24 -0500
Subject: GFS2: Userland expects quota limit/warn/usage in 512b blocks

Userland programs using the quotactl() syscall assume limit/warn/usage
block counts in 512b basic blocks which were instead being read/written
in fs blocksize in gfs2. With this patch, gfs2 correctly interacts with
the syscall using 512b blocks.

Signed-off-by: Abhi Das <adas@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/quota.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 58a9b9998b42..f606baf9ba72 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 			     struct fs_disk_quota *fdq)
 {
 	struct inode *inode = &ip->i_inode;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long index = loc >> PAGE_CACHE_SHIFT;
 	unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
@@ -658,11 +659,11 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	qd->qd_qb.qb_value = qp->qu_value;
 	if (fdq) {
 		if (fdq->d_fieldmask & FS_DQ_BSOFT) {
-			qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
+			qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
 			qd->qd_qb.qb_warn = qp->qu_warn;
 		}
 		if (fdq->d_fieldmask & FS_DQ_BHARD) {
-			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
+			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
 			qd->qd_qb.qb_limit = qp->qu_limit;
 		}
 	}
@@ -1497,9 +1498,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
 	fdq->d_version = FS_DQUOT_VERSION;
 	fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
 	fdq->d_id = id;
-	fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
-	fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
-	fdq->d_bcount = be64_to_cpu(qlvb->qb_value);
+	fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
+	fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
+	fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
 
 	gfs2_glock_dq_uninit(&q_gh);
 out:
@@ -1566,10 +1567,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 
 	/* If nothing has changed, this is a no-op */
 	if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
-	    (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn)))
+	    ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
 		fdq->d_fieldmask ^= FS_DQ_BSOFT;
 	if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
-	    (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit)))
+	    ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
 		fdq->d_fieldmask ^= FS_DQ_BHARD;
 	if (fdq->d_fieldmask == 0)
 		goto out_i;
-- 
cgit v1.2.3


From 1b1d76e2df2a0aa965d6a227db7061d3fff029b1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 18 Nov 2010 06:58:04 +0300
Subject: UBI: release locks in check_corruption

Commit 45aafd32996e27 "UBI: tighten the corrupted PEB criteria"
introduced some return paths that didn't release the ubi->buf_mutex

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/scan.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 3c631863bf40..204345be8e62 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -787,16 +787,15 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
 		 * erased, so it became unstable and corrupted, and should be
 		 * erased.
 		 */
-		return 0;
+		err = 0;
+		goto out_unlock;
 	}
 
 	if (err)
-		return err;
+		goto out_unlock;
 
-	if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) {
-		mutex_unlock(&ubi->buf_mutex);
-		return 0;
-	}
+	if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size))
+		goto out_unlock;
 
 	ubi_err("PEB %d contains corrupted VID header, and the data does not "
 		"contain all 0xFF, this may be a non-UBI PEB or a severe VID "
@@ -806,8 +805,11 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
 		pnum, ubi->leb_start, ubi->leb_size);
 	ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
 			       ubi->peb_buf1, ubi->leb_size, 1);
+	err = 1;
+
+out_unlock:
 	mutex_unlock(&ubi->buf_mutex);
-	return 1;
+	return err;
 }
 
 /**
-- 
cgit v1.2.3


From 4d0812c37f2f6cf6fc7ca086b5a5e572cbbe7f6d Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Sun, 14 Nov 2010 12:40:33 +0200
Subject: mmc: sdio: fix nasty oops in mmc_sdio_detect

Power off the card in mmc_sdio_detect __before__ a potential error
handler, which completely removes the card, executes, and only if the
card was successfully powered on beforehand.

While we're at it, use the _sync variant of the runtime PM put API, in
order to ensure that the card is left powered off in case an error
occurred, and the card is going to be removed.

Reproduced and tested on the OLPC XO-1.5.

Reported-by: Daniel Drake <dsd@laptop.org>
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index c3ad1058cd31..42a949b723b8 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -560,6 +560,19 @@ static void mmc_sdio_detect(struct mmc_host *host)
 
 	mmc_release_host(host);
 
+	/*
+	 * Tell PM core it's OK to power off the card now.
+	 *
+	 * The _sync variant is used in order to ensure that the card
+	 * is left powered off in case an error occurred, and the card
+	 * is going to be removed.
+	 *
+	 * Since there is no specific reason to believe a new user
+	 * is about to show up at this point, the _sync variant is
+	 * desirable anyway.
+	 */
+	pm_runtime_put_sync(&host->card->dev);
+
 out:
 	if (err) {
 		mmc_sdio_remove(host);
@@ -568,9 +581,6 @@ out:
 		mmc_detach_bus(host);
 		mmc_release_host(host);
 	}
-
-	/* Tell PM core that we're done */
-	pm_runtime_put(&host->card->dev);
 }
 
 /*
-- 
cgit v1.2.3


From ed919b0125b26dcc052e44836f66e7e1f5c49c7e Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Fri, 19 Nov 2010 09:29:09 +0200
Subject: mmc: sdio: fix runtime PM anomalies by introducing
 MMC_CAP_POWER_OFF_CARD

Some board/card/host configurations are not capable of powering off the
card after boot.

To support such configurations, and to allow smoother transition to
runtime PM behavior, MMC_CAP_POWER_OFF_CARD is added, so hosts need to
explicitly indicate whether it's OK to power off their cards after boot.

SDIO core will enable runtime PM for a card only if that cap is set.
As a result, the card will be powered down after boot, and will only
be powered up again when a driver is loaded (and then it's up to the
driver to decide whether power will be kept or not).

This will prevent sdio_bus_probe() failures with setups that do not
support powering off the card.

Reported-and-tested-by: Daniel Drake <dsd@laptop.org>
Reported-and-tested-by: Arnd Hannemann <arnd@arndnet.de>
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c     | 37 +++++++++++++++++++++++--------------
 drivers/mmc/core/sdio_bus.c | 33 ++++++++++++++++++++++-----------
 include/linux/mmc/host.h    |  1 +
 3 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 42a949b723b8..efef5f94ac42 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -547,9 +547,11 @@ static void mmc_sdio_detect(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	/* Make sure card is powered before detecting it */
-	err = pm_runtime_get_sync(&host->card->dev);
-	if (err < 0)
-		goto out;
+	if (host->caps & MMC_CAP_POWER_OFF_CARD) {
+		err = pm_runtime_get_sync(&host->card->dev);
+		if (err < 0)
+			goto out;
+	}
 
 	mmc_claim_host(host);
 
@@ -571,7 +573,8 @@ static void mmc_sdio_detect(struct mmc_host *host)
 	 * is about to show up at this point, the _sync variant is
 	 * desirable anyway.
 	 */
-	pm_runtime_put_sync(&host->card->dev);
+	if (host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_put_sync(&host->card->dev);
 
 out:
 	if (err) {
@@ -728,16 +731,21 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 	card = host->card;
 
 	/*
-	 * Let runtime PM core know our card is active
+	 * Enable runtime PM only if supported by host+card+board
 	 */
-	err = pm_runtime_set_active(&card->dev);
-	if (err)
-		goto remove;
+	if (host->caps & MMC_CAP_POWER_OFF_CARD) {
+		/*
+		 * Let runtime PM core know our card is active
+		 */
+		err = pm_runtime_set_active(&card->dev);
+		if (err)
+			goto remove;
 
-	/*
-	 * Enable runtime PM for this card
-	 */
-	pm_runtime_enable(&card->dev);
+		/*
+		 * Enable runtime PM for this card
+		 */
+		pm_runtime_enable(&card->dev);
+	}
 
 	/*
 	 * The number of functions on the card is encoded inside
@@ -755,9 +763,10 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 			goto remove;
 
 		/*
-		 * Enable Runtime PM for this func
+		 * Enable Runtime PM for this func (if supported)
 		 */
-		pm_runtime_enable(&card->sdio_func[i]->dev);
+		if (host->caps & MMC_CAP_POWER_OFF_CARD)
+			pm_runtime_enable(&card->sdio_func[i]->dev);
 	}
 
 	mmc_release_host(host);
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 2716c7ab6bbf..203da443e339 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -17,6 +17,7 @@
 #include <linux/pm_runtime.h>
 
 #include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
 #include <linux/mmc/sdio_func.h>
 
 #include "sdio_cis.h"
@@ -132,9 +133,11 @@ static int sdio_bus_probe(struct device *dev)
 	 * it should call pm_runtime_put_noidle() in its probe routine and
 	 * pm_runtime_get_noresume() in its remove routine.
 	 */
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0)
-		goto out;
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0)
+			goto out;
+	}
 
 	/* Set the default block size so the driver is sure it's something
 	 * sensible. */
@@ -151,7 +154,8 @@ static int sdio_bus_probe(struct device *dev)
 	return 0;
 
 disable_runtimepm:
-	pm_runtime_put_noidle(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_put_noidle(dev);
 out:
 	return ret;
 }
@@ -160,12 +164,14 @@ static int sdio_bus_remove(struct device *dev)
 {
 	struct sdio_driver *drv = to_sdio_driver(dev->driver);
 	struct sdio_func *func = dev_to_sdio_func(dev);
-	int ret;
+	int ret = 0;
 
 	/* Make sure card is powered before invoking ->remove() */
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0)
-		goto out;
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0)
+			goto out;
+	}
 
 	drv->remove(func);
 
@@ -178,10 +184,12 @@ static int sdio_bus_remove(struct device *dev)
 	}
 
 	/* First, undo the increment made directly above */
-	pm_runtime_put_noidle(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_put_noidle(dev);
 
 	/* Then undo the runtime PM settings in sdio_bus_probe() */
-	pm_runtime_put_noidle(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_put_noidle(dev);
 
 out:
 	return ret;
@@ -191,6 +199,8 @@ out:
 
 static int sdio_bus_pm_prepare(struct device *dev)
 {
+	struct sdio_func *func = dev_to_sdio_func(dev);
+
 	/*
 	 * Resume an SDIO device which was suspended at run time at this
 	 * point, in order to allow standard SDIO suspend/resume paths
@@ -212,7 +222,8 @@ static int sdio_bus_pm_prepare(struct device *dev)
 	 * since there is little point in failing system suspend if a
 	 * device can't be resumed.
 	 */
-	pm_runtime_resume(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_resume(dev);
 
 	return 0;
 }
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 6d87f68ce4b6..30f6fad99a58 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -168,6 +168,7 @@ struct mmc_host {
 						/* DDR mode at 1.8V */
 #define MMC_CAP_1_2V_DDR	(1 << 12)	/* can support */
 						/* DDR mode at 1.2V */
+#define MMC_CAP_POWER_OFF_CARD	(1 << 13)	/* Can power off after boot */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
-- 
cgit v1.2.3


From 784b4e29a26617589edd290dd2919735e190c06e Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Sun, 21 Nov 2010 22:20:49 -0500
Subject: Btrfs: add migrate page for metadata inode

Migrate page will directly call the btrfs btree writepage function,
which isn't actually allowed.

Our writepage assumes that you have locked the extent_buffer and
flagged the block as written.  Without doing these steps, we can
corrupt metadata blocks.

A later commit will remove the btree writepage function since
it is really only safely used internally by btrfs.  We
use writepages for everything else.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b40dfe48017b..a67b98d58c2a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -28,6 +28,7 @@
 #include <linux/freezer.h>
 #include <linux/crc32c.h>
 #include <linux/slab.h>
+#include <linux/migrate.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -355,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 	ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
 					     btrfs_header_generation(eb));
 	BUG_ON(ret);
+	WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
+
 	found_start = btrfs_header_bytenr(eb);
 	if (found_start != start) {
 		WARN_ON(1);
@@ -693,6 +696,26 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 				   __btree_submit_bio_done);
 }
 
+static int btree_migratepage(struct address_space *mapping,
+			struct page *newpage, struct page *page)
+{
+	/*
+	 * we can't safely write a btree page from here,
+	 * we haven't done the locking hook
+	 */
+	if (PageDirty(page))
+		return -EAGAIN;
+	/*
+	 * Buffers may be managed in a filesystem specific way.
+	 * We must have no buffers or drop them.
+	 */
+	if (page_has_private(page) &&
+	    !try_to_release_page(page, GFP_KERNEL))
+		return -EAGAIN;
+
+	return migrate_page(mapping, newpage, page);
+}
+
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct extent_io_tree *tree;
@@ -707,8 +730,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	redirty_page_for_writepage(wbc, page);
-	eb = btrfs_find_tree_block(root, page_offset(page),
-				      PAGE_CACHE_SIZE);
+	eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
 	WARN_ON(!eb);
 
 	was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
@@ -799,6 +821,7 @@ static const struct address_space_operations btree_aops = {
 	.releasepage	= btree_releasepage,
 	.invalidatepage = btree_invalidatepage,
 	.sync_page	= block_sync_page,
+	.migratepage	= btree_migratepage,
 };
 
 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
-- 
cgit v1.2.3


From 0c56fa9662927354255f2f64617d1de61fc03db9 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Mon, 22 Nov 2010 03:01:39 +0000
Subject: btrfs: fix free dip and dip->csums twice

bio_endio() will free dip and dip->csums, so dip and dip->csums twice will
be freed twice. Fix it.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5132c9af888a..8c027aa0020a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5731,7 +5731,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 
 	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 	if (ret)
-		goto out_err;
+		goto free_ordered;
 
 	if (write && !skip_sum) {
 		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
@@ -5740,7 +5740,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 				   __btrfs_submit_bio_start_direct_io,
 				   __btrfs_submit_bio_done);
 		if (ret)
-			goto out_err;
+			goto free_ordered;
 		return;
 	} else if (!skip_sum)
 		btrfs_lookup_bio_sums_dio(root, inode, bio,
@@ -5748,11 +5748,8 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 
 	ret = btrfs_map_bio(root, rw, bio, 0, 1);
 	if (ret)
-		goto out_err;
+		goto free_ordered;
 	return;
-out_err:
-	kfree(dip->csums);
-	kfree(dip);
 free_ordered:
 	/*
 	 * If this is a write, we need to clean up the reserved space and kill
-- 
cgit v1.2.3


From 88f794ede7fadd4b63135b94d1561c1f2d5eb5f5 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Mon, 22 Nov 2010 03:02:55 +0000
Subject: btrfs: cleanup duplicate bio allocating functions

extent_bio_alloc() and compressed_bio_alloc() are similar, cleanup
similar source code.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/compression.c | 15 +--------------
 fs/btrfs/extent_io.c   |  8 ++++----
 fs/btrfs/extent_io.h   |  3 +++
 3 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7845d1f7d1d9..b50bc4bd5c56 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
 static struct bio *compressed_bio_alloc(struct block_device *bdev,
 					u64 first_byte, gfp_t gfp_flags)
 {
-	struct bio *bio;
 	int nr_vecs;
 
 	nr_vecs = bio_get_nr_vecs(bdev);
-	bio = bio_alloc(gfp_flags, nr_vecs);
-
-	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-		while (!bio && (nr_vecs /= 2))
-			bio = bio_alloc(gfp_flags, nr_vecs);
-	}
-
-	if (bio) {
-		bio->bi_size = 0;
-		bio->bi_bdev = bdev;
-		bio->bi_sector = first_byte >> 9;
-	}
-	return bio;
+	return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
 }
 
 static int check_compressed_csum(struct inode *inode,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3b7eaee0f912..f60aa3c35c23 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1828,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
 	bio_put(bio);
 }
 
-static struct bio *
-extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
-		 gfp_t gfp_flags)
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+		gfp_t gfp_flags)
 {
 	struct bio *bio;
 
@@ -1919,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 	else
 		nr = bio_get_nr_vecs(bdev);
 
-	bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+	bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
 
 	bio_add_page(bio, page, page_size, offset);
 	bio->bi_end_io = end_io_func;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1c6d4f342ef7..4183c8178f01 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 				struct extent_io_tree *tree,
 				u64 start, u64 end, struct page *locked_page,
 				unsigned long op);
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+		gfp_t gfp_flags);
 #endif
-- 
cgit v1.2.3


From e65e1535542931e51189832264cd282e5899e4b9 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Mon, 22 Nov 2010 03:04:43 +0000
Subject: btrfs: fix panic caused by direct IO

btrfs paniced when we write >64KB data by direct IO at one time.

Reproduce steps:
 # mkfs.btrfs /dev/sda5 /dev/sda6
 # mount /dev/sda5 /mnt
 # dd if=/dev/zero of=/mnt/tmpfile bs=100K count=1 oflag=direct

Then btrfs paniced:
mapping failed logical 1103155200 bio len 69632 len 12288
------------[ cut here ]------------
kernel BUG at fs/btrfs/volumes.c:3010!
[SNIP]
Pid: 1992, comm: btrfs-worker-0 Not tainted 2.6.37-rc1 #1 D2399/PRIMERGY
RIP: 0010:[<ffffffffa03d1462>]  [<ffffffffa03d1462>] btrfs_map_bio+0x202/0x210 [btrfs]
[SNIP]
Call Trace:
 [<ffffffffa03ab3eb>] __btrfs_submit_bio_done+0x1b/0x20 [btrfs]
 [<ffffffffa03a35ff>] run_one_async_done+0x9f/0xb0 [btrfs]
 [<ffffffffa03d3d20>] run_ordered_completions+0x80/0xc0 [btrfs]
 [<ffffffffa03d45a4>] worker_loop+0x154/0x5f0 [btrfs]
 [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs]
 [<ffffffffa03d4450>] ? worker_loop+0x0/0x5f0 [btrfs]
 [<ffffffff81083216>] kthread+0x96/0xa0
 [<ffffffff8100cec4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81083180>] ? kthread+0x0/0xa0
 [<ffffffff8100cec0>] ? kernel_thread_helper+0x0/0x10

We fix this problem by splitting bios when we submit bios.

Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Tested-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 184 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8c027aa0020a..a47e4faa8c46 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5535,13 +5535,21 @@ struct btrfs_dio_private {
 	u64 bytes;
 	u32 *csums;
 	void *private;
+
+	/* number of bios pending for this dio */
+	atomic_t pending_bios;
+
+	/* IO errors */
+	int errors;
+
+	struct bio *orig_bio;
 };
 
 static void btrfs_endio_direct_read(struct bio *bio, int err)
 {
+	struct btrfs_dio_private *dip = bio->bi_private;
 	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct bio_vec *bvec = bio->bi_io_vec;
-	struct btrfs_dio_private *dip = bio->bi_private;
 	struct inode *inode = dip->inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 start;
@@ -5684,6 +5692,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
 	return 0;
 }
 
+static void btrfs_end_dio_bio(struct bio *bio, int err)
+{
+	struct btrfs_dio_private *dip = bio->bi_private;
+
+	if (err) {
+		printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
+		      "disk_bytenr %lu len %u err no %d\n",
+		      dip->inode->i_ino, bio->bi_rw, bio->bi_sector,
+		      bio->bi_size, err);
+		dip->errors = 1;
+
+		/*
+		 * before atomic variable goto zero, we must make sure
+		 * dip->errors is perceived to be set.
+		 */
+		smp_mb__before_atomic_dec();
+	}
+
+	/* if there are more bios still pending for this dio, just exit */
+	if (!atomic_dec_and_test(&dip->pending_bios))
+		goto out;
+
+	if (dip->errors)
+		bio_io_error(dip->orig_bio);
+	else {
+		set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+		bio_endio(dip->orig_bio, 0);
+	}
+out:
+	bio_put(bio);
+}
+
+static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
+				       u64 first_sector, gfp_t gfp_flags)
+{
+	int nr_vecs = bio_get_nr_vecs(bdev);
+	return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+}
+
+static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
+					 int rw, u64 file_offset, int skip_sum,
+					 u32 *csums)
+{
+	int write = rw & REQ_WRITE;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret;
+
+	bio_get(bio);
+	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+	if (ret)
+		goto err;
+
+	if (write && !skip_sum) {
+		ret = btrfs_wq_submit_bio(root->fs_info,
+				   inode, rw, bio, 0, 0,
+				   file_offset,
+				   __btrfs_submit_bio_start_direct_io,
+				   __btrfs_submit_bio_done);
+		goto err;
+	} else if (!skip_sum)
+		btrfs_lookup_bio_sums_dio(root, inode, bio,
+					  file_offset, csums);
+
+	ret = btrfs_map_bio(root, rw, bio, 0, 1);
+err:
+	bio_put(bio);
+	return ret;
+}
+
+static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
+				    int skip_sum)
+{
+	struct inode *inode = dip->inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+	struct bio *bio;
+	struct bio *orig_bio = dip->orig_bio;
+	struct bio_vec *bvec = orig_bio->bi_io_vec;
+	u64 start_sector = orig_bio->bi_sector;
+	u64 file_offset = dip->logical_offset;
+	u64 submit_len = 0;
+	u64 map_length;
+	int nr_pages = 0;
+	u32 *csums = dip->csums;
+	int ret = 0;
+
+	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_private = dip;
+	bio->bi_end_io = btrfs_end_dio_bio;
+	atomic_inc(&dip->pending_bios);
+
+	map_length = orig_bio->bi_size;
+	ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+			      &map_length, NULL, 0);
+	if (ret) {
+		bio_put(bio);
+		return -EIO;
+	}
+
+	while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
+		if (unlikely(map_length < submit_len + bvec->bv_len ||
+		    bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+				 bvec->bv_offset) < bvec->bv_len)) {
+			/*
+			 * inc the count before we submit the bio so
+			 * we know the end IO handler won't happen before
+			 * we inc the count. Otherwise, the dip might get freed
+			 * before we're done setting it up
+			 */
+			atomic_inc(&dip->pending_bios);
+			ret = __btrfs_submit_dio_bio(bio, inode, rw,
+						     file_offset, skip_sum,
+						     csums);
+			if (ret) {
+				bio_put(bio);
+				atomic_dec(&dip->pending_bios);
+				goto out_err;
+			}
+
+			if (!skip_sum)
+				csums = csums + nr_pages;
+			start_sector += submit_len >> 9;
+			file_offset += submit_len;
+
+			submit_len = 0;
+			nr_pages = 0;
+
+			bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
+						  start_sector, GFP_NOFS);
+			if (!bio)
+				goto out_err;
+			bio->bi_private = dip;
+			bio->bi_end_io = btrfs_end_dio_bio;
+
+			map_length = orig_bio->bi_size;
+			ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+					      &map_length, NULL, 0);
+			if (ret) {
+				bio_put(bio);
+				goto out_err;
+			}
+		} else {
+			submit_len += bvec->bv_len;
+			nr_pages ++;
+			bvec++;
+		}
+	}
+
+	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
+				     csums);
+	if (!ret)
+		return 0;
+
+	bio_put(bio);
+out_err:
+	dip->errors = 1;
+	/*
+	 * before atomic variable goto zero, we must
+	 * make sure dip->errors is perceived to be set.
+	 */
+	smp_mb__before_atomic_dec();
+	if (atomic_dec_and_test(&dip->pending_bios))
+		bio_io_error(dip->orig_bio);
+
+	/* bio_end_io() will handle error, so we needn't return it */
+	return 0;
+}
+
 static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 				loff_t file_offset)
 {
@@ -5723,33 +5901,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 
 	dip->disk_bytenr = (u64)bio->bi_sector << 9;
 	bio->bi_private = dip;
+	dip->errors = 0;
+	dip->orig_bio = bio;
+	atomic_set(&dip->pending_bios, 0);
 
 	if (write)
 		bio->bi_end_io = btrfs_endio_direct_write;
 	else
 		bio->bi_end_io = btrfs_endio_direct_read;
 
-	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
-	if (ret)
-		goto free_ordered;
-
-	if (write && !skip_sum) {
-		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
-				   inode, rw, bio, 0, 0,
-				   dip->logical_offset,
-				   __btrfs_submit_bio_start_direct_io,
-				   __btrfs_submit_bio_done);
-		if (ret)
-			goto free_ordered;
+	ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
+	if (!ret)
 		return;
-	} else if (!skip_sum)
-		btrfs_lookup_bio_sums_dio(root, inode, bio,
-					  dip->logical_offset, dip->csums);
-
-	ret = btrfs_map_bio(root, rw, bio, 0, 1);
-	if (ret)
-		goto free_ordered;
-	return;
 free_ordered:
 	/*
 	 * If this is a write, we need to clean up the reserved space and kill
-- 
cgit v1.2.3


From 6f33434850ed87dc5e56b60ebbad3d3cf405f296 Mon Sep 17 00:00:00 2001
From: Arne Jansen <sensille@gmx.net>
Date: Fri, 12 Nov 2010 23:17:56 +0000
Subject: btrfs: Fix early enospc because 'unused' calculated with wrong sign.

'unused' calculated with wrong sign in reserve_metadata_bytes().
This might have lead to unwanted over-reservations.

Signed-off-by: Arne Jansen <sensille@gmx.net>
Reviewed-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a541bc87f04c..ddaf6340fe7f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3413,7 +3413,7 @@ again:
 	 * our reservation.
 	 */
 	if (unused <= space_info->total_bytes) {
-		unused -= space_info->total_bytes;
+		unused = space_info->total_bytes - unused;
 		if (unused >= num_bytes) {
 			if (!reserved)
 				space_info->bytes_reserved += orig_bytes;
-- 
cgit v1.2.3


From 0de90876c6cb774d4a424dafc1fc9ec50071b81b Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 19 Nov 2010 13:40:41 +0000
Subject: Btrfs: handle the space_cache option properly

When I added the clear_cache option I screwed up and took the break out of
the space_cache case statement, so whenever you mount with space_cache you also
get clear_cache, which does you no good if you say set space_cache in fstab so
it always gets set.  This patch adds the break back in properly.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 718b10de2049..66e4612a7916 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -244,6 +244,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 		case Opt_space_cache:
 			printk(KERN_INFO "btrfs: enabling disk space caching\n");
 			btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+			break;
 		case Opt_clear_cache:
 			printk(KERN_INFO "btrfs: force clearing of disk cache\n");
 			btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
-- 
cgit v1.2.3


From 2a6b8daedaf3682bed3fc1d4e2390491f6e19c49 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 19 Nov 2010 01:36:10 +0000
Subject: btrfs: Check if dest_offset is block-size aligned before cloning file

We've done the check for src_offset and src_length, and We should
also check dest_offset, otherwise we'll corrupt the destination
file:

  (After cloning file1 to file2 with unaligned dest_offset)
  # cat /mnt/file2
  cat: /mnt/file2: Input/output error

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 463d91b4dd3a..81b47bd8a55a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1669,12 +1669,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 		olen = len = src->i_size - off;
 	/* if we extend to eof, continue to block boundary */
 	if (off + len == src->i_size)
-		len = ((src->i_size + bs-1) & ~(bs-1))
-			- off;
+		len = ALIGN(src->i_size, bs) - off;
 
 	/* verify the end result is block aligned */
-	if ((off & (bs-1)) ||
-	    ((off + len) & (bs-1)))
+	if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
+	    !IS_ALIGNED(destoff, bs))
 		goto out_unlock;
 
 	/* do any pending delalloc/csum calc on src, one way or
-- 
cgit v1.2.3


From 5f3888ff6f0b9dce60705765752b788a92557644 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 19 Nov 2010 01:36:34 +0000
Subject: btrfs: Set file size correctly in file clone

Set src_offset = 0, src_length = 20K, dest_offset = 20K. And the
original filesize of the dest file 'file2' is 30K:

  # ls -l /mnt/file2
  -rw-r--r-- 1 root root 30720 Nov 18 16:42 /mnt/file2

Now clone file1 to file2, the dest file should be 40K, but it
still shows 30K:

  # ls -l /mnt/file2
  -rw-r--r-- 1 root root 30720 Nov 18 16:42 /mnt/file2

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 81b47bd8a55a..6b4bfa72bf8d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1873,8 +1873,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 			 * but shouldn't round up the file size
 			 */
 			endoff = new_key.offset + datal;
-			if (endoff > off+olen)
-				endoff = off+olen;
+			if (endoff > destoff+olen)
+				endoff = destoff+olen;
 			if (endoff > inode->i_size)
 				btrfs_i_size_write(inode, endoff);
 
-- 
cgit v1.2.3


From f209561ad83c5ffd561dc4bc3a3c90b704fe9231 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 19 Nov 2010 02:05:24 +0000
Subject: btrfs: Show device attr correctly for symlinks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Symlinks and files of other types show different device numbers, though
they are on the same partition:

 $ touch tmp; ln -s tmp tmp2; stat tmp tmp2
   File: `tmp'
   Size: 0         	Blocks: 0          IO Block: 4096   regular empty file
 Device: 15h/21d	Inode: 984027      Links: 1
 --- snip ---
   File: `tmp2' -> `tmp'
   Size: 3         	Blocks: 0          IO Block: 4096   symbolic link
 Device: 13h/19d	Inode: 984028      Links: 1

Reported-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a47e4faa8c46..eed357ff6c99 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7299,6 +7299,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
+	.getattr	= btrfs_getattr,
 	.permission	= btrfs_permission,
 	.setxattr	= btrfs_setxattr,
 	.getxattr	= btrfs_getxattr,
-- 
cgit v1.2.3


From 0410c94aff109c02b6774a0ed00114987cda7ce5 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <mk@lab.zgora.pl>
Date: Sat, 20 Nov 2010 12:03:07 +0000
Subject: btrfs: make 1-bit signed fileds unsigned

Fixes these sparse warnings:
fs/btrfs/ctree.h:811:17: error: dubious one-bit signed bitfield
fs/btrfs/ctree.h:812:20: error: dubious one-bit signed bitfield
fs/btrfs/ctree.h:813:19: error: dubious one-bit signed bitfield

Signed-off-by: Mariusz Kozlowski <mk@lab.zgora.pl>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8db9234f6b41..af52f6d7a4d8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -808,9 +808,9 @@ struct btrfs_block_group_cache {
 	int extents_thresh;
 	int free_extents;
 	int total_bitmaps;
-	int ro:1;
-	int dirty:1;
-	int iref:1;
+	unsigned int ro:1;
+	unsigned int dirty:1;
+	unsigned int iref:1;
 
 	int disk_cache_state;
 
-- 
cgit v1.2.3


From 2ede0daf01549cecf4bb0962c46dc47382047523 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 17 Nov 2010 18:54:54 +0000
Subject: Btrfs: handle NFS lookups properly

People kept reporting NFS issues, specifically getting ESTALE alot.  I figured
out how to reproduce the problem

SERVER
mkfs.btrfs /dev/sda1
mount /dev/sda1 /mnt/btrfs-test
<add /mnt/btrfs-test to /etc/exports>
btrfs subvol create /mnt/btrfs-test/foo
service nfs start

CLIENT
mount server:/mnt/btrfs /mnt/test
cd /mnt/test/foo
ls

SERVER
echo 3 > /proc/sys/vm/drop_caches

CLIENT
ls			<-- get an ESTALE here

This is because the standard way to lookup a name in nfsd is to use readdir, and
what it does is do a readdir on the parent directory looking for the inode of
the child.  So in this case the parent being / and the child being foo.  Well
subvols all have the same inode number, so doing a readdir of / looking for
inode 256 will return '.', which obviously doesn't match foo.  So instead we
need to have our own .get_name so that we can find the right name.

Our .get_name will either lookup the inode backref or the root backref,
whichever we're looking for, and return the name we find.  Running the above
reproducer with this patch results in everything acting the way its supposed to.
Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/export.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 951ef09b82f4..6f0444473594 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -232,9 +232,85 @@ fail:
 	return ERR_PTR(ret);
 }
 
+static int btrfs_get_name(struct dentry *parent, char *name,
+			  struct dentry *child)
+{
+	struct inode *inode = child->d_inode;
+	struct inode *dir = parent->d_inode;
+	struct btrfs_path *path;
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct btrfs_inode_ref *iref;
+	struct btrfs_root_ref *rref;
+	struct extent_buffer *leaf;
+	unsigned long name_ptr;
+	struct btrfs_key key;
+	int name_len;
+	int ret;
+
+	if (!dir || !inode)
+		return -EINVAL;
+
+	if (!S_ISDIR(dir->i_mode))
+		return -EINVAL;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	path->leave_spinning = 1;
+
+	if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+		key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+		key.type = BTRFS_ROOT_BACKREF_KEY;
+		key.offset = (u64)-1;
+		root = root->fs_info->tree_root;
+	} else {
+		key.objectid = inode->i_ino;
+		key.offset = dir->i_ino;
+		key.type = BTRFS_INODE_REF_KEY;
+	}
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		btrfs_free_path(path);
+		return ret;
+	} else if (ret > 0) {
+		if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+			path->slots[0]--;
+		} else {
+			btrfs_free_path(path);
+			return -ENOENT;
+		}
+	}
+	leaf = path->nodes[0];
+
+	if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+	       rref = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_root_ref);
+	       name_ptr = (unsigned long)(rref + 1);
+	       name_len = btrfs_root_ref_name_len(leaf, rref);
+	} else {
+		iref = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_inode_ref);
+		name_ptr = (unsigned long)(iref + 1);
+		name_len = btrfs_inode_ref_name_len(leaf, iref);
+	}
+
+	read_extent_buffer(leaf, name, name_ptr, name_len);
+	btrfs_free_path(path);
+
+	/*
+	 * have to add the null termination to make sure that reconnect_path
+	 * gets the right len for strlen
+	 */
+	name[name_len] = '\0';
+
+	return 0;
+}
+
 const struct export_operations btrfs_export_ops = {
 	.encode_fh	= btrfs_encode_fh,
 	.fh_to_dentry	= btrfs_fh_to_dentry,
 	.fh_to_parent	= btrfs_fh_to_parent,
 	.get_parent	= btrfs_get_parent,
+	.get_name	= btrfs_get_name,
 };
-- 
cgit v1.2.3


From 76195853903ca613ba722203db9b747d70478fc7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 19 Nov 2010 02:18:02 +0000
Subject: Btrfs: fix more ESTALE problems with NFS

When creating new inodes we don't setup inode->i_generation.  So if we generate
an fh with a newly created inode we save the generation of 0, but if we flush
the inode to disk and have to read it back when getting the inode on the server
we'll have the right i_generation, so gens wont match and we get ESTALE.  This
patch properly sets inode->i_generation when we create the new inode and now I'm
no longer getting ESTALE.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eed357ff6c99..fc22f556aa24 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4501,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	BTRFS_I(inode)->index_cnt = 2;
 	BTRFS_I(inode)->root = root;
 	BTRFS_I(inode)->generation = trans->transid;
+	inode->i_generation = BTRFS_I(inode)->generation;
 	btrfs_set_inode_space_info(root, inode);
 
 	if (mode & S_IFDIR)
-- 
cgit v1.2.3


From 6a912213046ecb6511fdf35531a0c7de3de963c9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Sat, 20 Nov 2010 09:48:00 +0000
Subject: Btrfs: use dget_parent where we can UPDATED

There are lots of places where we do dentry->d_parent->d_inode without holding
the dentry->d_lock.  This could cause problems with rename.  So instead we need
to use dget_parent() and hold the reference to the parent as long as we are
going to use it's inode and then dput it at the end.

Signed-off-by: Josef Bacik <josef@redhat.com>
Cc: raven@themaw.net
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c       |  9 ++++++---
 fs/btrfs/ioctl.c       | 20 ++++++++++++++++----
 fs/btrfs/transaction.c |  5 ++++-
 fs/btrfs/tree-log.c    | 21 +++++++++++++++++----
 4 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fc22f556aa24..c0faf47d0cd9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4811,10 +4811,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (err) {
 		drop_inode = 1;
 	} else {
+		struct dentry *parent = dget_parent(dentry);
 		btrfs_update_inode_block_group(trans, dir);
 		err = btrfs_update_inode(trans, root, inode);
 		BUG_ON(err);
-		btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+		btrfs_log_new_name(trans, inode, NULL, parent);
+		dput(parent);
 	}
 
 	nr = trans->blocks_used;
@@ -6768,8 +6770,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	BUG_ON(ret);
 
 	if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
-		btrfs_log_new_name(trans, old_inode, old_dir,
-				   new_dentry->d_parent);
+		struct dentry *parent = dget_parent(new_dentry);
+		btrfs_log_new_name(trans, old_inode, old_dir, parent);
+		dput(parent);
 		btrfs_end_log_trans(root);
 	}
 out_fail:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6b4bfa72bf8d..f1c9bb4079ed 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -233,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root,
 	struct btrfs_inode_item *inode_item;
 	struct extent_buffer *leaf;
 	struct btrfs_root *new_root;
-	struct inode *dir = dentry->d_parent->d_inode;
+	struct dentry *parent = dget_parent(dentry);
+	struct inode *dir;
 	int ret;
 	int err;
 	u64 objectid;
@@ -242,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 	ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
 				       0, &objectid);
-	if (ret)
+	if (ret) {
+		dput(parent);
 		return ret;
+	}
+
+	dir = parent->d_inode;
+
 	/*
 	 * 1 - inode item
 	 * 2 - refs
@@ -251,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root,
 	 * 2 - dir items
 	 */
 	trans = btrfs_start_transaction(root, 6);
-	if (IS_ERR(trans))
+	if (IS_ERR(trans)) {
+		dput(parent);
 		return PTR_ERR(trans);
+	}
 
 	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
 				      0, objectid, NULL, 0, 0, 0);
@@ -339,6 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 	d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
+	dput(parent);
 	if (async_transid) {
 		*async_transid = trans->transid;
 		err = btrfs_commit_transaction_async(trans, root, 1);
@@ -354,6 +363,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 			   char *name, int namelen, u64 *async_transid)
 {
 	struct inode *inode;
+	struct dentry *parent;
 	struct btrfs_pending_snapshot *pending_snapshot;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -396,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 
 	btrfs_orphan_cleanup(pending_snapshot->snap);
 
-	inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+	parent = dget_parent(dentry);
+	inode = btrfs_lookup_dentry(parent->d_inode, dentry);
+	dput(parent);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		goto fail;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1fffbc017bdf..f50e931fc217 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -902,6 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	struct btrfs_root *root = pending->root;
 	struct btrfs_root *parent_root;
 	struct inode *parent_inode;
+	struct dentry *parent;
 	struct dentry *dentry;
 	struct extent_buffer *tmp;
 	struct extent_buffer *old;
@@ -941,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	trans->block_rsv = &pending->block_rsv;
 
 	dentry = pending->dentry;
-	parent_inode = dentry->d_parent->d_inode;
+	parent = dget_parent(dentry);
+	parent_inode = parent->d_inode;
 	parent_root = BTRFS_I(parent_inode)->root;
 	record_root_in_trans(trans, parent_root);
 
@@ -989,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 				 parent_inode->i_ino, index,
 				 dentry->d_name.name, dentry->d_name.len);
 	BUG_ON(ret);
+	dput(parent);
 
 	key.offset = (u64)-1;
 	pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a29f19384a27..054744ac5719 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2869,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 {
 	int ret = 0;
 	struct btrfs_root *root;
+	struct dentry *old_parent = NULL;
 
 	/*
 	 * for regular files, if its inode is already on disk, we don't
@@ -2910,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 		if (IS_ROOT(parent))
 			break;
 
-		parent = parent->d_parent;
+		parent = dget_parent(parent);
+		dput(old_parent);
+		old_parent = parent;
 		inode = parent->d_inode;
 
 	}
+	dput(old_parent);
 out:
 	return ret;
 }
@@ -2945,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 {
 	int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
 	struct super_block *sb;
+	struct dentry *old_parent = NULL;
 	int ret = 0;
 	u64 last_committed = root->fs_info->last_trans_committed;
 
@@ -3016,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		if (IS_ROOT(parent))
 			break;
 
-		parent = parent->d_parent;
+		parent = dget_parent(parent);
+		dput(old_parent);
+		old_parent = parent;
 	}
 	ret = 0;
 end_trans:
+	dput(old_parent);
 	if (ret < 0) {
 		BUG_ON(ret != -ENOSPC);
 		root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -3039,8 +3047,13 @@ end_no_trans:
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, struct dentry *dentry)
 {
-	return btrfs_log_inode_parent(trans, root, dentry->d_inode,
-				      dentry->d_parent, 0);
+	struct dentry *parent = dget_parent(dentry);
+	int ret;
+
+	ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
+	dput(parent);
+
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3


From 495e86779f4f319828bc10dfc0c9ac2161868077 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 19 Nov 2010 20:36:10 +0000
Subject: Btrfs: hold i_mutex when calling btrfs_log_dentry_safe

Since we walk up the path logging all of the parts of the inode's path, we need
to hold i_mutex to make sure that the inode is not renamed while we're logging
everything.  btrfs_log_dentry_safe does dget_parent and all of that jazz, but we
may get unexpected results if the rename changes the inode's location while
we're higher up the path logging those dentries, so do this for safety reasons.
Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e354c33df082..c1faded5fca0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1047,8 +1047,14 @@ out:
 
 		if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
 			trans = btrfs_start_transaction(root, 0);
+			if (IS_ERR(trans)) {
+				num_written = PTR_ERR(trans);
+				goto done;
+			}
+			mutex_lock(&inode->i_mutex);
 			ret = btrfs_log_dentry_safe(trans, root,
 						    file->f_dentry);
+			mutex_unlock(&inode->i_mutex);
 			if (ret == 0) {
 				ret = btrfs_sync_log(trans, root);
 				if (ret == 0)
@@ -1067,6 +1073,7 @@ out:
 			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
 		}
 	}
+done:
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
 }
-- 
cgit v1.2.3


From a1b075d28da563c5e2325577f282c042494254ba Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 19 Nov 2010 20:36:11 +0000
Subject: Btrfs: make btrfs_add_nondir take parent inode as an argument

Everybody who calls btrfs_add_nondir just passes in the dentry of the new file
and then dereference dentry->d_parent->d_inode, but everybody who calls
btrfs_add_nondir() are already passed the parent's inode.  So instead of
dereferencing dentry->d_parent, just make btrfs_add_nondir take the dir inode as
an argument and pass that along so we don't have to worry about d_parent.
Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c0faf47d0cd9..37cc1776a5d7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4623,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
-			    struct dentry *dentry, struct inode *inode,
-			    int backref, u64 index)
+			    struct inode *dir, struct dentry *dentry,
+			    struct inode *inode, int backref, u64 index)
 {
-	int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
-				 inode, dentry->d_name.name,
-				 dentry->d_name.len, backref, index);
+	int err = btrfs_add_link(trans, dir, inode,
+				 dentry->d_name.name, dentry->d_name.len,
+				 backref, index);
 	if (!err) {
 		d_instantiate(dentry, inode);
 		return 0;
@@ -4669,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, mode, &index);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
@@ -4683,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
@@ -4731,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino,
-				objectid, BTRFS_I(dir)->block_group, mode,
-				&index);
+				dentry->d_name.len, dir->i_ino, objectid,
+				BTRFS_I(dir)->block_group, mode, &index);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_unlock;
@@ -4746,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
@@ -4806,7 +4803,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	btrfs_set_trans_block_group(trans, dir);
 	atomic_inc(&inode->i_count);
 
-	err = btrfs_add_nondir(trans, dentry, inode, 1, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
 
 	if (err) {
 		drop_inode = 1;
@@ -4856,8 +4853,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, S_IFDIR | mode,
 				&index);
 	if (IS_ERR(inode)) {
@@ -4880,9 +4876,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (err)
 		goto out_fail;
 
-	err = btrfs_add_link(trans, dentry->d_parent->d_inode,
-				 inode, dentry->d_name.name,
-				 dentry->d_name.len, 0, index);
+	err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
+			     dentry->d_name.len, 0, index);
 	if (err)
 		goto out_fail;
 
@@ -6922,8 +6917,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
 				&index);
 	err = PTR_ERR(inode);
@@ -6937,7 +6931,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
-- 
cgit v1.2.3


From 45f49bce99d008d6864a20324548f35936ba46fb Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Sun, 21 Nov 2010 22:27:44 -0500
Subject: Btrfs: avoid NULL pointer deref in try_release_extent_buffer

If we fail to find a pointer in the radix tree, don't try
to deref the NULL one we do have.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent_io.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f60aa3c35c23..143d3f541d64 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3837,8 +3837,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
 
 	spin_lock(&tree->buffer_lock);
 	eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
-	if (!eb)
-		goto out;
+	if (!eb) {
+		spin_unlock(&tree->buffer_lock);
+		return ret;
+	}
 
 	if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
 		ret = 0;
-- 
cgit v1.2.3


From 8575d93386d6ce9a3d4961134018d4e6c6bed618 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Sun, 21 Nov 2010 20:40:21 +0300
Subject: ASoC: s3c24xx: test wrong variable

After clk_get() mclk is checked three times instead of mout_epll
and sclk_spdif checks.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/s3c24xx/smdk_spdif.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/s3c24xx/smdk_spdif.c b/sound/soc/s3c24xx/smdk_spdif.c
index f31d22ad7c88..c8bd90488a87 100644
--- a/sound/soc/s3c24xx/smdk_spdif.c
+++ b/sound/soc/s3c24xx/smdk_spdif.c
@@ -38,7 +38,7 @@ static int set_audio_clock_heirachy(struct platform_device *pdev)
 	}
 
 	mout_epll = clk_get(NULL, "mout_epll");
-	if (IS_ERR(fout_epll)) {
+	if (IS_ERR(mout_epll)) {
 		printk(KERN_WARNING "%s: Cannot find mout_epll.\n",
 				__func__);
 		ret = -EINVAL;
@@ -54,7 +54,7 @@ static int set_audio_clock_heirachy(struct platform_device *pdev)
 	}
 
 	sclk_spdif = clk_get(NULL, "sclk_spdif");
-	if (IS_ERR(fout_epll)) {
+	if (IS_ERR(sclk_spdif)) {
 		printk(KERN_WARNING "%s: Cannot find sclk_spdif.\n",
 				__func__);
 		ret = -EINVAL;
-- 
cgit v1.2.3


From 13a2e06c5898d27aadabfdb9830169101b21432f Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Mon, 22 Nov 2010 08:20:54 +0800
Subject: ASoC: stac9766 - set reg_cache_default to stac9766_reg

Looks like this is missing during multi-component conversion.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/codecs/stac9766.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/stac9766.c b/sound/soc/codecs/stac9766.c
index 00d67cc8e206..061f9e5a497b 100644
--- a/sound/soc/codecs/stac9766.c
+++ b/sound/soc/codecs/stac9766.c
@@ -383,6 +383,7 @@ static struct snd_soc_codec_driver soc_codec_dev_stac9766 = {
 	.reg_cache_size = sizeof(stac9766_reg),
 	.reg_word_size = sizeof(u16),
 	.reg_cache_step = 2,
+	.reg_cache_default = stac9766_reg,
 };
 
 static __devinit int stac9766_probe(struct platform_device *pdev)
-- 
cgit v1.2.3


From f570e1dd8469d39420f406a4f5442c270b1e759e Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 7 Nov 2010 09:43:33 -0500
Subject: EDAC: Remove deprecated kbuild goal definitions

Change EDAC's Makefile to use <modules>-y instead of
<modules>-objs because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

 [bp: Fixup commit message]
 [bp: Fixup indentation]

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 drivers/edac/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index b3781399b38a..ba2898b3639b 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -10,16 +10,16 @@ obj-$(CONFIG_EDAC)			:= edac_stub.o
 obj-$(CONFIG_EDAC_MM_EDAC)		+= edac_core.o
 obj-$(CONFIG_EDAC_MCE)			+= edac_mce.o
 
-edac_core-objs	:= edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
-edac_core-objs	+= edac_module.o edac_device_sysfs.o
+edac_core-y	:= edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
+edac_core-y	+= edac_module.o edac_device_sysfs.o
 
 ifdef CONFIG_PCI
-edac_core-objs	+= edac_pci.o edac_pci_sysfs.o
+edac_core-y	+= edac_pci.o edac_pci_sysfs.o
 endif
 
 obj-$(CONFIG_EDAC_MCE_INJ)		+= mce_amd_inj.o
 
-edac_mce_amd-objs			:= mce_amd.o
+edac_mce_amd-y				:= mce_amd.o
 obj-$(CONFIG_EDAC_DECODE_MCE)		+= edac_mce_amd.o
 
 obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o
-- 
cgit v1.2.3


From df4b2a30e0b9e553abfd63ee75dbbd66be80f01a Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 18 Nov 2010 22:05:43 -0500
Subject: EDAC, MCE: Fix edac_init_mce_inject error handling

Otherwise, variable i will be -1 inside the latest iteration of the
while loop.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 drivers/edac/mce_amd_inj.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index 8d0688f36d4c..39faded3cadd 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -139,7 +139,7 @@ static int __init edac_init_mce_inject(void)
 	return 0;
 
 err_sysfs_create:
-	while (i-- >= 0)
+	while (--i >= 0)
 		sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
 
 	kobject_del(mce_kobj);
-- 
cgit v1.2.3


From c1a3a4b90a5a47adcca0e587f5d7e9ea61329b26 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 22 Nov 2010 14:01:55 -0200
Subject: perf record: Handle restrictive permissions in
 /proc/{kallsyms,modules}

The 59365d1 commit, even being reverted by 33e0d57, showed a non robust
behavior in 'perf record': it really should just warn the user that some
functionality will not be available.

The new behavior then becomes:

	[acme@felicio linux]$ ls -la /proc/{kallsyms,modules}
	-r-------- 1 root root 0 Nov 22 12:19 /proc/kallsyms
	-r-------- 1 root root 0 Nov 22 12:19 /proc/modules
	[acme@felicio linux]$ perf record ls -R > /dev/null
	Couldn't record kernel reference relocation symbol
	Symbol resolution may be skewed if relocation was used (e.g. kexec).
	Check /proc/kallsyms permission or run as root.
	[ perf record: Woken up 1 times to write data ]
	[ perf record: Captured and wrote 0.004 MB perf.data (~161 samples) ]
	[acme@felicio linux]$ perf report --stdio
	[kernel.kallsyms] with build id 77b05e00e64e4de1c9347d83879779b540d69f00 not found, continuing without symbols
	# Events: 98  cycles
	#
	# Overhead  Command    Shared Object                Symbol
	# ........  .......  ...............  ....................
	#
	    48.26%       ls  [kernel]         [k] ffffffff8102b92b
	    22.49%       ls  libc-2.12.90.so  [.] __strlen_sse2
	     8.35%       ls  libc-2.12.90.so  [.] __GI___strcoll_l
	     8.17%       ls  ls               [.]            11580
	     3.35%       ls  libc-2.12.90.so  [.] _IO_new_file_xsputn
	     3.33%       ls  libc-2.12.90.so  [.] _int_malloc
	     1.88%       ls  libc-2.12.90.so  [.] _int_free
	     0.84%       ls  libc-2.12.90.so  [.] malloc_consolidate
	     0.84%       ls  libc-2.12.90.so  [.] __readdir64
	     0.83%       ls  ls               [.] strlen@plt
	     0.83%       ls  libc-2.12.90.so  [.] __GI_fwrite_unlocked
	     0.83%       ls  libc-2.12.90.so  [.] __memcpy_sse2

	#
	# (For a higher level overview, try: perf report --sort comm,dso)
	#
[acme@felicio linux]$

It still has the build-ids for DSOs in the maps with hits:

[acme@felicio linux]$ perf buildid-list
77b05e00e64e4de1c9347d83879779b540d69f00 [kernel.kallsyms]
09c4a431a4a8b648fcfc2c2bdda70f56050ddff1 /bin/ls
af75ea9ad951d25e0f038901a11b3846dccb29a4 /lib64/libc-2.12.90.so
[acme@felicio linux]$

That can be used in another machine to resolve kernel symbols.

Cc: Eugene Teo <eugeneteo@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jesper Juhl <jj@chaosbits.net>
Cc: Marcus Meissner <meissner@suse.de>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 93bd2ff001fb..e2c2de201eec 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -697,17 +697,18 @@ static int __cmd_record(int argc, const char **argv)
 	if (err < 0)
 		err = event__synthesize_kernel_mmap(process_synthesized_event,
 						    session, machine, "_stext");
-	if (err < 0) {
-		pr_err("Couldn't record kernel reference relocation symbol.\n");
-		return err;
-	}
+	if (err < 0)
+		pr_err("Couldn't record kernel reference relocation symbol\n"
+		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+		       "Check /proc/kallsyms permission or run as root.\n");
 
 	err = event__synthesize_modules(process_synthesized_event,
 					session, machine);
-	if (err < 0) {
-		pr_err("Couldn't record kernel reference relocation symbol.\n");
-		return err;
-	}
+	if (err < 0)
+		pr_err("Couldn't record kernel module information.\n"
+		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+		       "Check /proc/modules permission or run as root.\n");
+
 	if (perf_guest)
 		perf_session__process_machines(session, event__synthesize_guest_os);
 
-- 
cgit v1.2.3


From 5fc43978a79e8021c189660ab63249fd29c5fb32 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 11:13:31 -0500
Subject: SUNRPC: Fix an infinite loop in call_refresh/call_refreshresult

If the rpcauth_refreshcred() call returns an error other than
EACCES, ENOMEM or ETIMEDOUT, we currently end up looping forever
between call_refresh and call_refreshresult.

The correct thing to do here is to exit on all errors except
EAGAIN and ETIMEDOUT, for which case we retry 3 times, then
return EACCES.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9dab9573be41..92ce94f5146b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -989,20 +989,26 @@ call_refreshresult(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_status = 0;
-	task->tk_action = call_allocate;
-	if (status >= 0 && rpcauth_uptodatecred(task))
-		return;
+	task->tk_action = call_refresh;
 	switch (status) {
-	case -EACCES:
-		rpc_exit(task, -EACCES);
-		return;
-	case -ENOMEM:
-		rpc_exit(task, -ENOMEM);
+	case 0:
+		if (rpcauth_uptodatecred(task))
+			task->tk_action = call_allocate;
 		return;
 	case -ETIMEDOUT:
 		rpc_delay(task, 3*HZ);
+	case -EAGAIN:
+		status = -EACCES;
+		if (!task->tk_cred_retry)
+			break;
+		task->tk_cred_retry--;
+		dprintk("RPC: %5u %s: retry refresh creds\n",
+				task->tk_pid, __func__);
+		return;
 	}
-	task->tk_action = call_refresh;
+	dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
+				task->tk_pid, __func__, status);
+	rpc_exit(task, status);
 }
 
 /*
-- 
cgit v1.2.3


From b47d19de2c714020ba8f5545a6e7d4968f37eb45 Mon Sep 17 00:00:00 2001
From: Arun Bharadwaj <arun@linux.vnet.ibm.com>
Date: Thu, 18 Nov 2010 10:36:43 +0000
Subject: Pure nfs client performance using odirect.

When an application opens a file with O_DIRECT flag, if the size of
the data that is written is equal to wsize, the client sends a
WRITE RPC with stable flag set to UNSTABLE followed by a single
COMMIT RPC rather than sending a single WRITE RPC with the stable
flag set to FILE_SYNC. This a bug.

Patch to fix this.

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 84d3c8b90206..e6ace0d93c71 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -867,7 +867,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 	nfs_alloc_commit_data(dreq);
 
-	if (dreq->commit_data == NULL || count < wsize)
+	if (dreq->commit_data == NULL || count <= wsize)
 		sync = NFS_FILE_SYNC;
 
 	dreq->inode = inode;
-- 
cgit v1.2.3


From 463a376eae1c92a66c912af539bfd4bbefa37673 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 12:22:20 -0500
Subject: NFS: Buffer overflow in ->decode_dirent() should not be fatal

Overflowing the buffer in the readdir ->decode_dirent() should not lead to
a fatal error, but rather to an attempt to reread the record in question.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs2xdr.c | 2 +-
 fs/nfs/nfs3xdr.c | 2 +-
 fs/nfs/nfs4xdr.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 2563f765c9b4..ab5937734f0b 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -495,7 +495,7 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
 
 out_overflow:
 	print_overflow_msg(__func__, xdr);
-	return ERR_PTR(-EIO);
+	return ERR_PTR(-EAGAIN);
 }
 
 /*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 748dc91a4a14..e79e4f5f5d53 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -656,7 +656,7 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
 out_overflow:
 	print_overflow_msg(__func__, xdr);
 out_overflow_exit:
-	return ERR_PTR(-EIO);
+	return ERR_PTR(-EAGAIN);
 }
 
 /*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b7a204ff6fe1..a3b39cbc90f9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6221,7 +6221,7 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
 out_overflow:
 	print_overflow_msg(__func__, xdr);
-	return ERR_PTR(-EIO);
+	return ERR_PTR(-EAGAIN);
 }
 
 /*
-- 
cgit v1.2.3


From 5c346854d8ce6ca91931f8fc9177934257a667d0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 12:43:45 -0500
Subject: NFS: Assume eof if the server returns no readdir records

Some servers are known to be buggy w.r.t. this. Deal with them...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 662df2a5fad5..2789cb3fc425 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -466,8 +466,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 	struct xdr_stream stream;
 	struct xdr_buf buf;
 	__be32 *ptr = xdr_page;
-	int status;
 	struct nfs_cache_array *array;
+	unsigned int count = 0;
+	int status;
 
 	buf.head->iov_base = xdr_page;
 	buf.head->iov_len = buflen;
@@ -488,6 +489,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 			break;
 		}
 
+		count++;
+
 		if (desc->plus == 1)
 			nfs_prime_dcache(desc->file->f_path.dentry, entry);
 
@@ -496,13 +499,14 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 			break;
 	} while (!entry->eof);
 
-	if (status == -EBADCOOKIE && entry->eof) {
+	if (count == 0 || (status == -EBADCOOKIE && entry->eof == 1)) {
 		array = nfs_readdir_get_array(page);
 		if (!IS_ERR(array)) {
 			array->eof_index = array->size;
 			status = 0;
 			nfs_readdir_release_array(page);
-		}
+		} else
+			status = PTR_ERR(array);
 	}
 	return status;
 }
-- 
cgit v1.2.3


From e7c58e974a0318fcca5368e7b3570e10e9ae9028 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 13:22:24 -0500
Subject: NFS: Fix a page leak in nfs_do_filldir()

nfs_do_filldir() must always free desc->page when it is done, otherwise
we end up leaking the page.

Also remove unused variable 'dentry'.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2789cb3fc425..42e66e961d73 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -701,11 +701,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int res = 0;
 	struct nfs_cache_array *array = NULL;
 	unsigned int d_type = DT_UNKNOWN;
-	struct dentry *dentry = NULL;
 
 	array = nfs_readdir_get_array(desc->page);
-	if (IS_ERR(array))
-		return PTR_ERR(array);
+	if (IS_ERR(array)) {
+		res = PTR_ERR(array);
+		goto out;
+	}
 
 	for (i = desc->cache_entry_index; i < array->size; i++) {
 		d_type = DT_UNKNOWN;
@@ -726,9 +727,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 		desc->eof = 1;
 
 	nfs_readdir_release_array(desc->page);
+out:
 	cache_page_release(desc);
-	if (dentry != NULL)
-		dput(dentry);
 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
 			(unsigned long long)*desc->dir_cookie, res);
 	return res;
-- 
cgit v1.2.3


From 7a8e1dc34f52fd2927dbf7e520d7cd8eadc51336 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 13:24:46 -0500
Subject: NFS: Fix a page leak in uncached_readdir()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 42e66e961d73..353f47c31b1d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -763,13 +763,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 		goto out;
 	}
 
+	desc->page_index = 0;
+	desc->page = page;
+
 	if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
 		status = -EIO;
 		goto out_release;
 	}
 
-	desc->page_index = 0;
-	desc->page = page;
 	status = nfs_do_filldir(desc, dirent, filldir);
 
  out:
-- 
cgit v1.2.3


From 85f8607e163f8d281fb407357279cb4ac6df12e6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 13:24:49 -0500
Subject: NFS: Fix the error handling in "uncached_readdir()"

Currently, uncached_readdir() is broken because if fails to handle
the results from nfs_readdir_xdr_to_array() correctly.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 353f47c31b1d..2492bacf68a7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -766,10 +766,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->page_index = 0;
 	desc->page = page;
 
-	if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
-		status = -EIO;
+	status = nfs_readdir_xdr_to_array(desc, page, inode);
+	if (status < 0)
 		goto out_release;
-	}
 
 	status = nfs_do_filldir(desc, dirent, filldir);
 
-- 
cgit v1.2.3


From ece0b4233b6b915d1f63add2bd9f2733aec6317a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 13:55:33 -0500
Subject: NFS: Don't ignore errors from nfs_do_filldir()

We should ignore the errors from the filldir callback, and just interpret
them as meaning we should exit, however we should definitely pass back
ENOMEM errors.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2492bacf68a7..ddc2e439702b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -709,13 +709,15 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	}
 
 	for (i = desc->cache_entry_index; i < array->size; i++) {
+		struct nfs_cache_array_entry *ent;
 		d_type = DT_UNKNOWN;
 
-		res = filldir(dirent, array->array[i].string.name,
-			array->array[i].string.len, file->f_pos,
-			nfs_compat_user_ino64(array->array[i].ino), d_type);
-		if (res < 0)
+		ent = &array->array[i];
+		if (filldir(dirent, ent->string.name, ent->string.len,
+		    file->f_pos, nfs_compat_user_ino64(ent->ino), d_type) < 0) {
+			desc->eof = 1;
 			break;
+		}
 		file->f_pos++;
 		desc->cache_entry_index = i;
 		if (i < (array->size-1))
@@ -820,14 +822,14 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		res = readdir_search_pagecache(desc);
 
 		if (res == -EBADCOOKIE) {
+			res = 0;
 			/* This means either end of directory */
 			if (*desc->dir_cookie && desc->eof == 0) {
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc, dirent, filldir);
-				if (res >= 0)
+				if (res == 0)
 					continue;
 			}
-			res = 0;
 			break;
 		}
 		if (res == -ETOOSMALL && desc->plus) {
@@ -842,10 +844,8 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			break;
 
 		res = nfs_do_filldir(desc, dirent, filldir);
-		if (res < 0) {
-			res = 0;
+		if (res < 0)
 			break;
-		}
 	}
 out:
 	nfs_unblock_sillyrename(dentry);
-- 
cgit v1.2.3


From 3020093f578fb6c9acc6914dfd887a1ebd1db659 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 15:18:22 -0500
Subject: NFS: Correct the array bound calculation in nfs_readdir_add_to_array

It looks as if the array size calculation in MAX_READDIR_ARRAY does not
take the alignment of struct nfs_cache_array_entry into account.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ddc2e439702b..ced7291cc5f8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -171,8 +171,6 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[0];
 };
 
-#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
-
 typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 typedef struct {
 	struct file	*file;
@@ -257,11 +255,14 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 
 	if (IS_ERR(array))
 		return PTR_ERR(array);
+
+	cache_entry = &array->array[array->size];
+
+	/* Check that this entry lies within the page bounds */
 	ret = -ENOSPC;
-	if (array->size >= MAX_READDIR_ARRAY)
+	if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
 		goto out;
 
-	cache_entry = &array->array[array->size];
 	cache_entry->cookie = entry->prev_cookie;
 	cache_entry->ino = entry->ino;
 	ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
-- 
cgit v1.2.3


From 0b26a0bf6ff398185546432420bb772bcfdf8d94 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 20 Nov 2010 14:26:44 -0500
Subject: NFS: Ensure we return the dirent->d_type when it is known

Store the dirent->d_type in the struct nfs_cache_array_entry so that we
can use it in getdents() calls.

This fixes a regression with the new readdir code.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            | 7 ++++---
 fs/nfs/internal.h       | 9 +++++++++
 fs/nfs/nfs2xdr.c        | 2 ++
 fs/nfs/nfs3xdr.c        | 2 ++
 fs/nfs/nfs4xdr.c        | 4 ++++
 include/linux/nfs_xdr.h | 1 +
 6 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ced7291cc5f8..8ea4a4180a87 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -162,6 +162,7 @@ struct nfs_cache_array_entry {
 	u64 cookie;
 	u64 ino;
 	struct qstr string;
+	unsigned char d_type;
 };
 
 struct nfs_cache_array {
@@ -265,6 +266,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 
 	cache_entry->cookie = entry->prev_cookie;
 	cache_entry->ino = entry->ino;
+	cache_entry->d_type = entry->d_type;
 	ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
 	if (ret)
 		goto out;
@@ -701,7 +703,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int i = 0;
 	int res = 0;
 	struct nfs_cache_array *array = NULL;
-	unsigned int d_type = DT_UNKNOWN;
 
 	array = nfs_readdir_get_array(desc->page);
 	if (IS_ERR(array)) {
@@ -711,11 +712,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 
 	for (i = desc->cache_entry_index; i < array->size; i++) {
 		struct nfs_cache_array_entry *ent;
-		d_type = DT_UNKNOWN;
 
 		ent = &array->array[i];
 		if (filldir(dirent, ent->string.name, ent->string.len,
-		    file->f_pos, nfs_compat_user_ino64(ent->ino), d_type) < 0) {
+		    file->f_pos, nfs_compat_user_ino64(ent->ino),
+		    ent->d_type) < 0) {
 			desc->eof = 1;
 			break;
 		}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index db08ff3ff454..e6356b750b77 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -361,6 +361,15 @@ unsigned int nfs_page_length(struct page *page)
 	return 0;
 }
 
+/*
+ * Convert a umode to a dirent->d_type
+ */
+static inline
+unsigned char nfs_umode_to_dtype(umode_t mode)
+{
+	return (mode >> 12) & 15;
+}
+
 /*
  * Determine the number of pages in an array of length 'len' and
  * with a base offset of 'base'
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index ab5937734f0b..5914a1911c95 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -485,6 +485,8 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
 	entry->prev_cookie	  = entry->cookie;
 	entry->cookie	  = ntohl(*p++);
 
+	entry->d_type = DT_UNKNOWN;
+
 	p = xdr_inline_peek(xdr, 8);
 	if (p != NULL)
 		entry->eof = !p[0] && p[1];
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e79e4f5f5d53..f6cc60f06dac 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -622,11 +622,13 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
 	entry->prev_cookie = entry->cookie;
 	p = xdr_decode_hyper(p, &entry->cookie);
 
+	entry->d_type = DT_UNKNOWN;
 	if (plus) {
 		entry->fattr->valid = 0;
 		p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
 		if (IS_ERR(p))
 			goto out_overflow_exit;
+		entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
 		/* In fact, a post_op_fh3: */
 		p = xdr_inline_decode(xdr, 4);
 		if (unlikely(!p))
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index a3b39cbc90f9..9f1826b012e6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6208,6 +6208,10 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
 		entry->ino = entry->fattr->fileid;
 
+	entry->d_type = DT_UNKNOWN;
+	if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
+		entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
+
 	if (verify_attr_len(xdr, p, len) < 0)
 		goto out_overflow;
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ba6cc8f223c9..80f07198a31a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -483,6 +483,7 @@ struct nfs_entry {
 	int			eof;
 	struct nfs_fh *		fh;
 	struct nfs_fattr *	fattr;
+	unsigned char		d_type;
 };
 
 /*
-- 
cgit v1.2.3


From 15ec44611904be0dcc97b84c29fbf964e5e2b36f Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 19 Nov 2010 16:48:39 -0500
Subject: mmc: sdhci: 8-bit bus width changes

We now:
 * check for a v3 controller before setting 8-bit bus width
 * offer a callback for platform code to switch to 8-bit mode, which
   allows non-v3 controllers to support it
 * rely on mmc->caps |= MMC_CAP_8_BIT_DATA; in platform code to specify
   that the board designers have indeed brought out all the pins for
   8-bit to the slot.

We were previously relying only on whether the *controller* supported
8-bit, which doesn't tell us anything about the pin configuration in
the board design.

This fixes the MMC card regression reported by Maxim Levitsky here:
   http://thread.gmane.org/gmane.linux.kernel.mmc/4336
by no longer assuming that 8-bit works by default.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Tested-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 arch/arm/plat-pxa/include/plat/sdhci.h |  3 +++
 drivers/mmc/host/sdhci-pxa.c           |  4 ++++
 drivers/mmc/host/sdhci.c               | 44 +++++++++++++++++++++++++---------
 drivers/mmc/host/sdhci.h               |  5 +++-
 4 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/arch/arm/plat-pxa/include/plat/sdhci.h b/arch/arm/plat-pxa/include/plat/sdhci.h
index e49c5b6fc4e2..1ab332e37d7d 100644
--- a/arch/arm/plat-pxa/include/plat/sdhci.h
+++ b/arch/arm/plat-pxa/include/plat/sdhci.h
@@ -17,6 +17,9 @@
 /* Require clock free running */
 #define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0)
 
+/* Board design supports 8-bit data on SD/SDIO BUS */
+#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2)
+
 /*
  * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI
  * @max_speed: the maximum speed supported
diff --git a/drivers/mmc/host/sdhci-pxa.c b/drivers/mmc/host/sdhci-pxa.c
index fc406ac5d193..5a61208cbc66 100644
--- a/drivers/mmc/host/sdhci-pxa.c
+++ b/drivers/mmc/host/sdhci-pxa.c
@@ -141,6 +141,10 @@ static int __devinit sdhci_pxa_probe(struct platform_device *pdev)
 	if (pdata->quirks)
 		host->quirks |= pdata->quirks;
 
+	/* If slot design supports 8 bit data, indicate this to MMC. */
+	if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
+		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+
 	ret = sdhci_add_host(host);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to add host\n");
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 154cbf83c1ab..a25db426c910 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1185,17 +1185,31 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (host->ops->platform_send_init_74_clocks)
 		host->ops->platform_send_init_74_clocks(host, ios->power_mode);
 
-	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
-
-	if (ios->bus_width == MMC_BUS_WIDTH_8)
-		ctrl |= SDHCI_CTRL_8BITBUS;
-	else
-		ctrl &= ~SDHCI_CTRL_8BITBUS;
+	/*
+	 * If your platform has 8-bit width support but is not a v3 controller,
+	 * or if it requires special setup code, you should implement that in
+	 * platform_8bit_width().
+	 */
+	if (host->ops->platform_8bit_width)
+		host->ops->platform_8bit_width(host, ios->bus_width);
+	else {
+		ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+		if (ios->bus_width == MMC_BUS_WIDTH_8) {
+			ctrl &= ~SDHCI_CTRL_4BITBUS;
+			if (host->version >= SDHCI_SPEC_300)
+				ctrl |= SDHCI_CTRL_8BITBUS;
+		} else {
+			if (host->version >= SDHCI_SPEC_300)
+				ctrl &= ~SDHCI_CTRL_8BITBUS;
+			if (ios->bus_width == MMC_BUS_WIDTH_4)
+				ctrl |= SDHCI_CTRL_4BITBUS;
+			else
+				ctrl &= ~SDHCI_CTRL_4BITBUS;
+		}
+		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+	}
 
-	if (ios->bus_width == MMC_BUS_WIDTH_4)
-		ctrl |= SDHCI_CTRL_4BITBUS;
-	else
-		ctrl &= ~SDHCI_CTRL_4BITBUS;
+	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
 
 	if ((ios->timing == MMC_TIMING_SD_HS ||
 	     ios->timing == MMC_TIMING_MMC_HS)
@@ -1855,11 +1869,19 @@ int sdhci_add_host(struct sdhci_host *host)
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300;
 	else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
+
 	mmc->f_max = host->max_clk;
 	mmc->caps |= MMC_CAP_SDIO_IRQ;
 
+	/*
+	 * A controller may support 8-bit width, but the board itself
+	 * might not have the pins brought out.  Boards that support
+	 * 8-bit width must set "mmc->caps |= MMC_CAP_8_BIT_DATA;" in
+	 * their platform code before calling sdhci_add_host(), and we
+	 * won't assume 8-bit width for hosts without that CAP.
+	 */
 	if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA))
-		mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA;
+		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
 	if (caps & SDHCI_CAN_DO_HISPD)
 		mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index d52a7163b97a..e42d7f00c060 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -76,7 +76,7 @@
 #define   SDHCI_CTRL_ADMA1	0x08
 #define   SDHCI_CTRL_ADMA32	0x10
 #define   SDHCI_CTRL_ADMA64	0x18
-#define  SDHCI_CTRL_8BITBUS	0x20
+#define   SDHCI_CTRL_8BITBUS	0x20
 
 #define SDHCI_POWER_CONTROL	0x29
 #define  SDHCI_POWER_ON		0x01
@@ -155,6 +155,7 @@
 #define  SDHCI_CLOCK_BASE_SHIFT	8
 #define  SDHCI_MAX_BLOCK_MASK	0x00030000
 #define  SDHCI_MAX_BLOCK_SHIFT  16
+#define  SDHCI_CAN_DO_8BIT	0x00040000
 #define  SDHCI_CAN_DO_ADMA2	0x00080000
 #define  SDHCI_CAN_DO_ADMA1	0x00100000
 #define  SDHCI_CAN_DO_HISPD	0x00200000
@@ -215,6 +216,8 @@ struct sdhci_ops {
 	unsigned int	(*get_max_clock)(struct sdhci_host *host);
 	unsigned int	(*get_min_clock)(struct sdhci_host *host);
 	unsigned int	(*get_timeout_clock)(struct sdhci_host *host);
+	int		(*platform_8bit_width)(struct sdhci_host *host,
+					       int width);
 	void (*platform_send_init_74_clocks)(struct sdhci_host *host,
 					     u8 power_mode);
 	unsigned int    (*get_ro)(struct sdhci_host *host);
-- 
cgit v1.2.3


From d47844a014fada1a788719f6426bc7044f2a0fd8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 20 Nov 2010 03:08:47 +0100
Subject: ath9k: fix timeout on stopping rx dma

It seems that using ath9k_hw_stoppcurecv to stop rx dma is not enough.
When it's time to stop DMA, the PCU is still busy, so the rx enable
bit never clears.
Using ath9k_hw_abortpcurecv helps with getting rx stopped much faster,
with this change, I cannot reproduce the rx stop related WARN_ON anymore.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/recv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index c76ea53c20ce..1a62e351ec77 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -518,7 +518,7 @@ bool ath_stoprecv(struct ath_softc *sc)
 	bool stopped;
 
 	spin_lock_bh(&sc->rx.rxbuflock);
-	ath9k_hw_stoppcurecv(ah);
+	ath9k_hw_abortpcurecv(ah);
 	ath9k_hw_setrxfilter(ah, 0);
 	stopped = ath9k_hw_stopdmarecv(ah);
 
-- 
cgit v1.2.3


From 1d8638d4038eb8709edc80e37a0bbb77253d86e9 Mon Sep 17 00:00:00 2001
From: Daniel Klaffenbach <danielklaffenbach@gmail.com>
Date: Fri, 19 Nov 2010 21:25:21 -0600
Subject: ssb: b43-pci-bridge: Add new vendor for BCM4318

Add new vendor for Broadcom 4318.

Signed-off-by: Daniel Klaffenbach <danielklaffenbach@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@kernel.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/b43_pci_bridge.c | 1 +
 include/linux/pci_ids.h      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/ssb/b43_pci_bridge.c b/drivers/ssb/b43_pci_bridge.c
index ef9c6a04ad8f..744d3f6e4709 100644
--- a/drivers/ssb/b43_pci_bridge.c
+++ b/drivers/ssb/b43_pci_bridge.c
@@ -24,6 +24,7 @@ static const struct pci_device_id b43_pci_bridge_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4312) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4315) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4318) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BCM_GVC,  0x4318) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4319) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4320) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4321) },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index d278dd9cb765..f29c25ede70d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2043,6 +2043,7 @@
 #define PCI_DEVICE_ID_AFAVLAB_P030	0x2182
 #define PCI_SUBDEVICE_ID_AFAVLAB_P061		0x2150
 
+#define PCI_VENDOR_ID_BCM_GVC          0x14a4
 #define PCI_VENDOR_ID_BROADCOM		0x14e4
 #define PCI_DEVICE_ID_TIGON3_5752	0x1600
 #define PCI_DEVICE_ID_TIGON3_5752M	0x1601
-- 
cgit v1.2.3


From b397492a8c1022887a9b2fb925fe92e69ce0ad4d Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Sat, 20 Nov 2010 13:15:27 +0100
Subject: carl9170: fix virtual interface setup crash

This patch fixes a faulty bound check which caused a
crash when too many virtual interface were brought up.

BUG: unable to handle kernel NULL pointer dereference at 00000004
IP: [<f8125f67>] carl9170_op_add_interface+0x1d7/0x2c0 [carl9170]
*pde = 00000000
Oops: 0002 [#1] PREEMPT
Modules linked in: carl9170 [...]
Pid: 4720, comm: wpa_supplicant Not tainted 2.6.37-rc2-wl+
EIP: 0060:[<f8125f67>] EFLAGS: 00210206 CPU: 0
EIP is at carl9170_op_add_interface+0x1d7/0x2c0 [carl9170]
EAX: 00000000 ...
Process wpa_supplicant
Stack:
 f4f88f34 fffffff4 ..
Call Trace:
 [<f8f4e666>] ? ieee80211_do_open+0x406/0x5c0 [mac80211]
 [...]
Code: <89> 42 04 ...
EIP: [<f8125f67>] carl9170_op_add_interface+0x1d7/0x2c0 [carl9170]
CR2: 0000000000000004

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/carl9170/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index 980ae70ea424..a314c2c2bfbe 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -647,7 +647,7 @@ init:
 	}
 
 unlock:
-	if (err && (vif_id != -1)) {
+	if (err && (vif_id >= 0)) {
 		vif_priv->active = false;
 		bitmap_release_region(&ar->vif_bitmap, vif_id, 0);
 		ar->vifs--;
-- 
cgit v1.2.3


From c48a2916355337895aea33839d39810eb575e775 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <guenter.roeck@ericsson.com>
Date: Fri, 19 Nov 2010 09:39:11 -0800
Subject: hwmon: (i5k_amb) Fix compile warning

This patch fixes the following compile warning.

drivers/hwmon/i5k_amb.c:500: warning: 'i5k_amb_ids' defined but not used

The warning is seen if the driver is built into the kernel (not as module).

Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/hwmon/i5k_amb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hwmon/i5k_amb.c b/drivers/hwmon/i5k_amb.c
index 937983407e2a..c4c40be0edbf 100644
--- a/drivers/hwmon/i5k_amb.c
+++ b/drivers/hwmon/i5k_amb.c
@@ -497,12 +497,14 @@ static unsigned long chipset_ids[] = {
 	0
 };
 
+#ifdef MODULE
 static struct pci_device_id i5k_amb_ids[] __devinitdata = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, i5k_amb_ids);
+#endif
 
 static int __devinit i5k_amb_probe(struct platform_device *pdev)
 {
-- 
cgit v1.2.3


From 32292f49f9683622ad0fb95a8c754910121e7b16 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <guenter.roeck@ericsson.com>
Date: Thu, 18 Nov 2010 21:06:45 -0800
Subject: hwmon: (lis3lv02d_i2c) Fix compile warnings

This commit fixes the following compile warnings.

From v2.6.37-rc2/m68k/m68k-allmodconfig, v2.6.37-rc2/powerpc/powerpc-randconfig:
drivers/hwmon/lis3lv02d_i2c.c:222: warning: 'lis3_i2c_runtime_suspend' defined but not used
drivers/hwmon/lis3lv02d_i2c.c:231: warning: 'lis3_i2c_runtime_resume' defined but not used

Seen if CONFIG_PM_RUNTIME is not set.

From v2.6.37-rc2/sh4/sh-allyesconfig:
drivers/hwmon/lis3lv02d_i2c.c:191: warning: 'lis3lv02d_i2c_suspend' defined but not used
drivers/hwmon/lis3lv02d_i2c.c:201: warning: 'lis3lv02d_i2c_resume' defined but not used

Seen if CONFIG_PM is set but CONFIG_PM_SLEEP is not.

Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/hwmon/lis3lv02d_i2c.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/hwmon/lis3lv02d_i2c.c b/drivers/hwmon/lis3lv02d_i2c.c
index 9f4bae07f719..8853afce85ce 100644
--- a/drivers/hwmon/lis3lv02d_i2c.c
+++ b/drivers/hwmon/lis3lv02d_i2c.c
@@ -186,7 +186,7 @@ static int __devexit lis3lv02d_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int lis3lv02d_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
@@ -213,12 +213,9 @@ static int lis3lv02d_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#else
-#define lis3lv02d_i2c_suspend	NULL
-#define lis3lv02d_i2c_resume	NULL
-#define lis3lv02d_i2c_shutdown	NULL
-#endif
+#endif /* CONFIG_PM_SLEEP */
 
+#ifdef CONFIG_PM_RUNTIME
 static int lis3_i2c_runtime_suspend(struct device *dev)
 {
 	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
@@ -236,6 +233,7 @@ static int lis3_i2c_runtime_resume(struct device *dev)
 	lis3lv02d_poweron(lis3);
 	return 0;
 }
+#endif /* CONFIG_PM_RUNTIME */
 
 static const struct i2c_device_id lis3lv02d_id[] = {
 	{"lis3lv02d", 0 },
-- 
cgit v1.2.3


From b3915d1fb6557dda206f4644ba9aa96ffd9a99d2 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Mon, 22 Nov 2010 18:59:13 +0300
Subject: ASoC: atmel: test wrong variable

After clk_get() mclk is checked second time instead of pllb check.
In patch v1 Jarkko Nikula noticed that PTR_ERR() is also has wrong argument.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/atmel/sam9g20_wm8731.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c
index 293569dfd0ed..032e17dd8fdb 100644
--- a/sound/soc/atmel/sam9g20_wm8731.c
+++ b/sound/soc/atmel/sam9g20_wm8731.c
@@ -222,9 +222,9 @@ static int __init at91sam9g20ek_init(void)
 	}
 
 	pllb = clk_get(NULL, "pllb");
-	if (IS_ERR(mclk)) {
+	if (IS_ERR(pllb)) {
 		printk(KERN_ERR "ASoC: Failed to get PLLB\n");
-		ret = PTR_ERR(mclk);
+		ret = PTR_ERR(pllb);
 		goto err_mclk;
 	}
 	ret = clk_set_parent(mclk, pllb);
-- 
cgit v1.2.3


From f71a4734b1ad7edccbfd9bd395df328ebbd94287 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 22 Nov 2010 19:11:48 +0000
Subject: ASoC: Fix multi-component mismerge in WM8523

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 sound/soc/codecs/wm8523.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/soc/codecs/wm8523.c b/sound/soc/codecs/wm8523.c
index 712ef7c76f90..9a433a5396cb 100644
--- a/sound/soc/codecs/wm8523.c
+++ b/sound/soc/codecs/wm8523.c
@@ -146,7 +146,6 @@ static int wm8523_startup(struct snd_pcm_substream *substream,
 		return -EINVAL;
 	}
 
-	return 0;
 	snd_pcm_hw_constraint_list(substream->runtime, 0,
 				   SNDRV_PCM_HW_PARAM_RATE,
 				   &wm8523->rate_constraint);
-- 
cgit v1.2.3


From eba19fdd818dfec3782ff095591e51c9bd617403 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 19 Nov 2010 16:09:15 +0000
Subject: ASoC: Restore WM8994 volatile and readable register operations

They went AWOL during the multi-component merge.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 sound/soc/codecs/wm8994.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 830dfdd66c5f..ea3ee9fde2b1 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -4073,6 +4073,8 @@ static struct snd_soc_codec_driver soc_codec_dev_wm8994 = {
 	.resume =	wm8994_resume,
 	.read = wm8994_read,
 	.write = wm8994_write,
+	.readable_register = wm8994_readable,
+	.volatile_register = wm8994_volatile,
 	.set_bias_level = wm8994_set_bias_level,
 };
 
-- 
cgit v1.2.3


From 103cfcf522cefe00d8c322c6beac9a711acbf235 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 23 Nov 2010 09:26:02 +0300
Subject: nilfs2: nilfs_iget_for_gc() returns ERR_PTR

nilfs_iget_for_gc() returns an ERR_PTR() on failure and doesn't return
NULL.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 3e90f86d5bfe..e00d9457c256 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -349,8 +349,8 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
 		ino = vdesc->vd_ino;
 		cno = vdesc->vd_cno;
 		inode = nilfs_iget_for_gc(sb, ino, cno);
-		if (unlikely(inode == NULL)) {
-			ret = -ENOMEM;
+		if (IS_ERR(inode)) {
+			ret = PTR_ERR(inode);
 			goto failed;
 		}
 		do {
-- 
cgit v1.2.3


From 92a5288501685bf05fc348ee2a3115a9bd9ae36f Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Mon, 22 Nov 2010 22:54:03 +0100
Subject: ASoC: MPC5200: Eliminate duplicate include of of_device.h

Eliminate duplicate  #include <linux/of_device.h>  from
sound/soc/fsl/mpc5200_dma.c

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/fsl/mpc5200_dma.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c
index dce6b551cd78..f92dca07cd35 100644
--- a/sound/soc/fsl/mpc5200_dma.c
+++ b/sound/soc/fsl/mpc5200_dma.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/slab.h>
-#include <linux/of_device.h>
 #include <linux/of_platform.h>
 
 #include <sound/soc.h>
-- 
cgit v1.2.3


From 7a479b02843c8d78ef51a64d1168592258440c97 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 23 Nov 2010 14:14:07 +0800
Subject: ASoC: Do not update the cache if write to hardware failed

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@nokia.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/codecs/tpa6130a2.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/soc/codecs/tpa6130a2.c b/sound/soc/codecs/tpa6130a2.c
index ee4fb201de60..d2c243095673 100644
--- a/sound/soc/codecs/tpa6130a2.c
+++ b/sound/soc/codecs/tpa6130a2.c
@@ -78,8 +78,10 @@ static int tpa6130a2_i2c_write(int reg, u8 value)
 
 	if (data->power_state) {
 		val = i2c_smbus_write_byte_data(tpa6130a2_client, reg, value);
-		if (val < 0)
+		if (val < 0) {
 			dev_err(&tpa6130a2_client->dev, "Write failed\n");
+			return val;
+		}
 	}
 
 	/* Either powered on or off, we save the context */
-- 
cgit v1.2.3


From bc5954f00e80c55140f546c80f34a8660bdd2c5f Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 23 Nov 2010 15:56:21 +0800
Subject: ASoC: max98088 - fix a memory leak

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/codecs/max98088.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index 470cb93b1d1f..d63e28773eb1 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c
@@ -2019,7 +2019,10 @@ err_access:
 
 static int max98088_remove(struct snd_soc_codec *codec)
 {
+       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+
        max98088_set_bias_level(codec, SND_SOC_BIAS_OFF);
+       kfree(max98088->eq_texts);
 
        return 0;
 }
-- 
cgit v1.2.3


From cd70978cb59fd20dccdfc790ea8bb308c2dfd1d6 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 23 Nov 2010 15:57:49 +0800
Subject: ASoC: wm8904 - fix memory leaks

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/codecs/wm8904.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index 33be84e506ea..fca60a0b57b8 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -2498,6 +2498,8 @@ static int wm8904_remove(struct snd_soc_codec *codec)
 
 	wm8904_set_bias_level(codec, SND_SOC_BIAS_OFF);
 	regulator_bulk_free(ARRAY_SIZE(wm8904->supplies), wm8904->supplies);
+	kfree(wm8904->retune_mobile_texts);
+	kfree(wm8904->drc_texts);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 24fb2b1174ddc1f844e2008eb5b3105832860395 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 23 Nov 2010 15:58:39 +0800
Subject: ASoC: wm8994 - fix memory leaks

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/codecs/wm8994.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index ea3ee9fde2b1..4d3e6f1ac584 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -4061,6 +4061,8 @@ static int  wm8994_codec_remove(struct snd_soc_codec *codec)
 	wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC2_DET, wm8994);
 	wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC1_SHRT, wm8994);
 	wm8994_free_irq(codec->control_data, WM8994_IRQ_MIC1_DET, wm8994);
+	kfree(wm8994->retune_mobile_texts);
+	kfree(wm8994->drc_texts);
 	kfree(wm8994);
 
 	return 0;
-- 
cgit v1.2.3


From 02a9d03772aa1ff33a26180a2da0bfb191240eda Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Tue, 23 Nov 2010 22:08:18 +0530
Subject: perf symbols: Remove incorrect open-coded container_of()

At least on ARM, padding is inserted between rb_node and sym in struct
symbol_name_rb_node, causing "((void *)sym) - sizeof(struct rb_node)" to
point inside rb_node rather than to the symbol_name_rb_node.  Fix this
by converting the code to use container_of().

Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Zanussi <tzanussi@gmail.com>
LKML-Reference: <20101123163106.GA25677@debian>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b39f499e575a..0500895a45af 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -295,7 +295,9 @@ static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym)
 {
 	struct rb_node **p = &self->rb_node;
 	struct rb_node *parent = NULL;
-	struct symbol_name_rb_node *symn = ((void *)sym) - sizeof(*parent), *s;
+	struct symbol_name_rb_node *symn, *s;
+
+	symn = container_of(sym, struct symbol_name_rb_node, sym);
 
 	while (*p != NULL) {
 		parent = *p;
-- 
cgit v1.2.3


From e476a5a41ad67d0e2b4a652820c49a3923eb936b Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 23 Nov 2010 17:10:24 +0100
Subject: wireless: b43: fix error path in SDIO

Fix unbalanced call to sdio_release_host() on the error path.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/sdio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/b43/sdio.c b/drivers/net/wireless/b43/sdio.c
index 9a55338d957f..09e2dfd7b175 100644
--- a/drivers/net/wireless/b43/sdio.c
+++ b/drivers/net/wireless/b43/sdio.c
@@ -163,6 +163,7 @@ static int b43_sdio_probe(struct sdio_func *func,
 err_free_ssb:
 	kfree(sdio);
 err_disable_func:
+	sdio_claim_host(func);
 	sdio_disable_func(func);
 err_release_host:
 	sdio_release_host(func);
-- 
cgit v1.2.3


From f6c26ec5085be805c9dc72d074ef5f504b9cd7df Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Wed, 24 Nov 2010 02:18:59 +0900
Subject: nilfs2: fix typo in comment of nilfs_dat_move function

Fixes a typo: "uncommited" -> "uncommitted".

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/dat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 49c844dab33a..59e5fe742f7b 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -335,7 +335,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
 	 * the device at this point.
 	 *
 	 * To prevent nilfs_dat_translate() from returning the
-	 * uncommited block number, this makes a copy of the entry
+	 * uncommitted block number, this makes a copy of the entry
 	 * buffer and redirects nilfs_dat_translate() to the copy.
 	 */
 	if (!buffer_nilfs_redirected(entry_bh)) {
-- 
cgit v1.2.3


From d94772070acc5a8f312ab4650cbbf5e78ea9dda2 Mon Sep 17 00:00:00 2001
From: Denis Kuplyakov <dener.kup@gmail.com>
Date: Wed, 24 Nov 2010 06:01:09 +0100
Subject: ALSA: hda - Fix Acer 7730G support

Fixes automatic EAPD configuration on Acer 7730G laptop.

Signed-off-by: Denis Kuplyakov <dener.kup@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 49 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0ac6aed0c889..8f7530fc7644 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2013,6 +2013,36 @@ static struct hda_verb alc888_acer_aspire_6530g_verbs[] = {
 	{ }
 };
 
+/*
+ *ALC888 Acer Aspire 7730G model
+ */
+
+static struct hda_verb alc888_acer_aspire_7730G_verbs[] = {
+/* Bias voltage on for external mic port */
+	{0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN | PIN_VREF80},
+/* Front Mic: set to PIN_IN (empty by default) */
+	{0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+/* Unselect Front Mic by default in input mixer 3 */
+	{0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0xb)},
+/* Enable unsolicited event for HP jack */
+	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
+/* Enable speaker output */
+	{0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x14, AC_VERB_SET_EAPD_BTLENABLE, 2},
+/* Enable headphone output */
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT | PIN_HP},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
+	{0x15, AC_VERB_SET_EAPD_BTLENABLE, 2},
+/*Enable internal subwoofer */
+	{0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+	{0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	{0x17, AC_VERB_SET_CONNECT_SEL, 0x02},
+	{0x17, AC_VERB_SET_EAPD_BTLENABLE, 2},
+	{ }
+};
+
 /*
  * ALC889 Acer Aspire 8930G model
  */
@@ -2200,6 +2230,16 @@ static void alc888_acer_aspire_6530g_setup(struct hda_codec *codec)
 	spec->autocfg.speaker_pins[2] = 0x17;
 }
 
+static void alc888_acer_aspire_7730g_setup(struct hda_codec *codec)
+{
+	struct alc_spec *spec = codec->spec;
+
+	spec->autocfg.hp_pins[0] = 0x15;
+	spec->autocfg.speaker_pins[0] = 0x14;
+	spec->autocfg.speaker_pins[1] = 0x16;
+	spec->autocfg.speaker_pins[2] = 0x17;
+}
+
 static void alc889_acer_aspire_8930g_setup(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
@@ -9524,13 +9564,6 @@ static struct hda_verb alc883_acer_eapd_verbs[] = {
 	{ }
 };
 
-static struct hda_verb alc888_acer_aspire_7730G_verbs[] = {
-	{0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
-	{0x17, AC_VERB_SET_CONNECT_SEL, 0x02},
-	{0x15, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
-	{ } /* end */
-};
-
 static void alc888_6st_dell_setup(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
@@ -10328,7 +10361,7 @@ static struct alc_config_preset alc882_presets[] = {
 		.const_channel_count = 6,
 		.input_mux = &alc883_capture_source,
 		.unsol_event = alc_automute_amp_unsol_event,
-		.setup = alc888_acer_aspire_6530g_setup,
+		.setup = alc888_acer_aspire_7730g_setup,
 		.init_hook = alc_automute_amp,
 	},
 	[ALC883_MEDION] = {
-- 
cgit v1.2.3


From e8ee13a818db4954517cea7da6e7c15b9656eb00 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 19 Nov 2010 07:22:58 +0000
Subject: ARM: mach-shmobile: clock-sh7372: remove fsidiv bogus disable

Current FSIDIV clock framework had bogus disable.
This patch remove it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/clock-sh7372.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 7db31e6c6bf2..13226323e4e0 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -453,10 +453,8 @@ static int fsidiv_enable(struct clk *clk)
 	unsigned long value;
 
 	value  = __raw_readl(clk->mapping->base) >> 16;
-	if (value < 2) {
-		fsidiv_disable(clk);
+	if (value < 2)
 		return -ENOENT;
-	}
 
 	__raw_writel((value << 16) | 0x3, clk->mapping->base);
 
@@ -468,11 +466,6 @@ static int fsidiv_set_rate(struct clk *clk,
 {
 	int idx;
 
-	if (clk->parent->rate == rate) {
-		fsidiv_disable(clk);
-		return 0;
-	}
-
 	idx = (clk->parent->rate / rate) & 0xffff;
 	if (idx < 2)
 		return -ENOENT;
-- 
cgit v1.2.3


From d4bc99b977e3a1dd10a84a01ebe59ac2ccebf0cd Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 24 Nov 2010 02:44:06 +0000
Subject: ARM: mach-shmobile: ap4evb: FSI clock use proper process for HDMI

Current AP4 FSI set_rate function used bogus clock process
which didn't care enable/disable and clk->usecound.
To solve this issue, this patch also modify FSI driver to call
set_rate with enough options.
This patch modify it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/board-ap4evb.c | 58 +++++++++++++++++++++++++++--------
 arch/arm/mach-shmobile/clock-sh7372.c |  2 +-
 include/sound/sh_fsi.h                |  6 ++--
 sound/soc/sh/fsi.c                    | 19 ++++++++++--
 4 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index d3260542b943..61c1068198ec 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -567,40 +567,72 @@ static struct platform_device *qhd_devices[] __initdata = {
 
 /* FSI */
 #define IRQ_FSI		evt2irq(0x1840)
+static int __fsi_set_rate(struct clk *clk, long rate, int enable)
+{
+	int ret = 0;
+
+	if (rate <= 0)
+		return ret;
 
-static int fsi_set_rate(int is_porta, int rate)
+	if (enable) {
+		ret = clk_set_rate(clk, clk_round_rate(clk, rate));
+		if (0 == ret)
+			ret = clk_enable(clk);
+	} else {
+		clk_disable(clk);
+	}
+
+	return ret;
+}
+
+static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable)
 {
 	struct clk *fsib_clk;
 	struct clk *fdiv_clk = &sh7372_fsidivb_clk;
+	long fsib_rate = 0;
+	long fdiv_rate = 0;
+	int ackmd_bpfmd;
 	int ret;
 
 	/* set_rate is not needed if port A */
 	if (is_porta)
 		return 0;
 
-	fsib_clk = clk_get(NULL, "fsib_clk");
-	if (IS_ERR(fsib_clk))
-		return -EINVAL;
-
 	switch (rate) {
 	case 44100:
-		clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 11283000));
-		ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
+		fsib_rate	= rate * 256;
+		ackmd_bpfmd	= SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
 		break;
 	case 48000:
-		clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000));
-		clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000));
-		ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
+		fsib_rate	= 85428000; /* around 48kHz x 256 x 7 */
+		fdiv_rate	= rate * 256;
+		ackmd_bpfmd	= SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
 		break;
 	default:
 		pr_err("unsupported rate in FSI2 port B\n");
-		ret = -EINVAL;
-		break;
+		return -EINVAL;
 	}
 
+	/* FSI B setting */
+	fsib_clk = clk_get(dev, "ickb");
+	if (IS_ERR(fsib_clk))
+		return -EIO;
+
+	ret = __fsi_set_rate(fsib_clk, fsib_rate, enable);
 	clk_put(fsib_clk);
+	if (ret < 0)
+		return ret;
 
-	return ret;
+	/* FSI DIV setting */
+	ret = __fsi_set_rate(fdiv_clk, fdiv_rate, enable);
+	if (ret < 0) {
+		/* disable FSI B */
+		if (enable)
+			__fsi_set_rate(fsib_clk, fsib_rate, 0);
+		return ret;
+	}
+
+	return ackmd_bpfmd;
 }
 
 static struct sh_fsi_platform_info fsi_info = {
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 13226323e4e0..4191e2921127 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -471,7 +471,7 @@ static int fsidiv_set_rate(struct clk *clk,
 		return -ENOENT;
 
 	__raw_writel(idx << 16, clk->mapping->base);
-	return fsidiv_enable(clk);
+	return 0;
 }
 
 static struct clk_ops fsidiv_clk_ops = {
diff --git a/include/sound/sh_fsi.h b/include/sound/sh_fsi.h
index fa60cbda90a4..d79894192ae3 100644
--- a/include/sound/sh_fsi.h
+++ b/include/sound/sh_fsi.h
@@ -85,7 +85,9 @@
  *     ACK_MD (FSI2)
  *     CKG1   (FSI)
  *
- * err:  return value < 0
+ * err		: return value <  0
+ * no change	: return value == 0
+ * change xMD	: return value >  0
  *
  * 0x-00000AB
  *
@@ -111,7 +113,7 @@
 struct sh_fsi_platform_info {
 	unsigned long porta_flags;
 	unsigned long portb_flags;
-	int (*set_rate)(int is_porta, int rate); /* for master mode */
+	int (*set_rate)(struct device *dev, int is_porta, int rate, int enable);
 };
 
 #endif /* __SOUND_FSI_H */
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
index 507e709f2807..136414f163e9 100644
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -132,6 +132,8 @@ struct fsi_priv {
 	struct fsi_stream playback;
 	struct fsi_stream capture;
 
+	long rate;
+
 	u32 mst_ctrl;
 };
 
@@ -854,10 +856,17 @@ static void fsi_dai_shutdown(struct snd_pcm_substream *substream,
 {
 	struct fsi_priv *fsi = fsi_get_priv(substream);
 	int is_play = fsi_is_play(substream);
+	struct fsi_master *master = fsi_get_master(fsi);
+	int (*set_rate)(struct device *dev, int is_porta, int rate, int enable);
 
 	fsi_irq_disable(fsi, is_play);
 	fsi_clk_ctrl(fsi, 0);
 
+	set_rate = master->info->set_rate;
+	if (set_rate && fsi->rate)
+		set_rate(dai->dev, fsi_is_port_a(fsi), fsi->rate, 0);
+	fsi->rate = 0;
+
 	pm_runtime_put_sync(dai->dev);
 }
 
@@ -891,9 +900,10 @@ static int fsi_dai_hw_params(struct snd_pcm_substream *substream,
 {
 	struct fsi_priv *fsi = fsi_get_priv(substream);
 	struct fsi_master *master = fsi_get_master(fsi);
-	int (*set_rate)(int is_porta, int rate) = master->info->set_rate;
+	int (*set_rate)(struct device *dev, int is_porta, int rate, int enable);
 	int fsi_ver = master->core->ver;
 	int is_play = fsi_is_play(substream);
+	long rate = params_rate(params);
 	int ret;
 
 	/* if slave mode, set_rate is not needed */
@@ -901,10 +911,15 @@ static int fsi_dai_hw_params(struct snd_pcm_substream *substream,
 		return 0;
 
 	/* it is error if no set_rate */
+	set_rate = master->info->set_rate;
 	if (!set_rate)
 		return -EIO;
 
-	ret = set_rate(fsi_is_port_a(fsi), params_rate(params));
+	ret = set_rate(dai->dev, fsi_is_port_a(fsi), rate, 1);
+	if (ret < 0) /* error */
+		return ret;
+
+	fsi->rate = rate;
 	if (ret > 0) {
 		u32 data = 0;
 
-- 
cgit v1.2.3


From 22de4e1fe446794acaebdf19dcaff4256d659972 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 19 Nov 2010 07:23:17 +0000
Subject: ARM: mach-shmobile: ap4evb: FSI clock use proper process for ak4642

Current AP4 FSI didn't use set_rate for ak4642,
and used dummy rate when init.
And FSI driver was modified to always call set_rate.

The user which are using FSI set_rate is only AP4 now.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/board-ap4evb.c | 96 ++++++++++++++++++++++++-----------
 sound/soc/sh/fsi.c                    |  8 +--
 2 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index 61c1068198ec..e084b423146e 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -575,7 +575,7 @@ static int __fsi_set_rate(struct clk *clk, long rate, int enable)
 		return ret;
 
 	if (enable) {
-		ret = clk_set_rate(clk, clk_round_rate(clk, rate));
+		ret = clk_set_rate(clk, rate);
 		if (0 == ret)
 			ret = clk_enable(clk);
 	} else {
@@ -585,7 +585,56 @@ static int __fsi_set_rate(struct clk *clk, long rate, int enable)
 	return ret;
 }
 
-static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable)
+static int __fsi_set_round_rate(struct clk *clk, long rate, int enable)
+{
+	return __fsi_set_rate(clk, clk_round_rate(clk, rate), enable);
+}
+
+static int fsi_ak4642_set_rate(struct device *dev, int rate, int enable)
+{
+	struct clk *fsia_ick;
+	struct clk *fsiack;
+	int ret = -EIO;
+
+	fsia_ick = clk_get(dev, "icka");
+	if (IS_ERR(fsia_ick))
+		return PTR_ERR(fsia_ick);
+
+	/*
+	 * FSIACK is connected to AK4642,
+	 * and use external clock pin from it.
+	 * it is parent of fsia_ick now.
+	 */
+	fsiack = clk_get_parent(fsia_ick);
+	if (!fsiack)
+		goto fsia_ick_out;
+
+	/*
+	 * we get 1/1 divided clock by setting same rate to fsiack and fsia_ick
+	 *
+	 ** FIXME **
+	 * Because the freq_table of external clk (fsiack) are all 0,
+	 * the return value of clk_round_rate became 0.
+	 * So, it use __fsi_set_rate here.
+	 */
+	ret = __fsi_set_rate(fsiack, rate, enable);
+	if (ret < 0)
+		goto fsiack_out;
+
+	ret = __fsi_set_round_rate(fsia_ick, rate, enable);
+	if ((ret < 0) && enable)
+		__fsi_set_round_rate(fsiack, rate, 0); /* disable FSI ACK */
+
+fsiack_out:
+	clk_put(fsiack);
+
+fsia_ick_out:
+	clk_put(fsia_ick);
+
+	return 0;
+}
+
+static int fsi_hdmi_set_rate(struct device *dev, int rate, int enable)
 {
 	struct clk *fsib_clk;
 	struct clk *fdiv_clk = &sh7372_fsidivb_clk;
@@ -594,10 +643,6 @@ static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable)
 	int ackmd_bpfmd;
 	int ret;
 
-	/* set_rate is not needed if port A */
-	if (is_porta)
-		return 0;
-
 	switch (rate) {
 	case 44100:
 		fsib_rate	= rate * 256;
@@ -618,23 +663,35 @@ static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable)
 	if (IS_ERR(fsib_clk))
 		return -EIO;
 
-	ret = __fsi_set_rate(fsib_clk, fsib_rate, enable);
+	ret = __fsi_set_round_rate(fsib_clk, fsib_rate, enable);
 	clk_put(fsib_clk);
 	if (ret < 0)
 		return ret;
 
 	/* FSI DIV setting */
-	ret = __fsi_set_rate(fdiv_clk, fdiv_rate, enable);
+	ret = __fsi_set_round_rate(fdiv_clk, fdiv_rate, enable);
 	if (ret < 0) {
 		/* disable FSI B */
 		if (enable)
-			__fsi_set_rate(fsib_clk, fsib_rate, 0);
+			__fsi_set_round_rate(fsib_clk, fsib_rate, 0);
 		return ret;
 	}
 
 	return ackmd_bpfmd;
 }
 
+static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable)
+{
+	int ret;
+
+	if (is_porta)
+		ret = fsi_ak4642_set_rate(dev, rate, enable);
+	else
+		ret = fsi_hdmi_set_rate(dev, rate, enable);
+
+	return ret;
+}
+
 static struct sh_fsi_platform_info fsi_info = {
 	.porta_flags = SH_FSI_BRS_INV |
 		       SH_FSI_OUT_SLAVE_MODE |
@@ -928,23 +985,11 @@ out:
 
 device_initcall(hdmi_init_pm_clock);
 
-#define FSIACK_DUMMY_RATE 48000
 static int __init fsi_init_pm_clock(void)
 {
 	struct clk *fsia_ick;
 	int ret;
 
-	/*
-	 * FSIACK is connected to AK4642,
-	 * and the rate is depend on playing sound rate.
-	 * So, set dummy rate (= 48k) here
-	 */
-	ret = clk_set_rate(&sh7372_fsiack_clk, FSIACK_DUMMY_RATE);
-	if (ret < 0) {
-		pr_err("Cannot set FSIACK dummy rate: %d\n", ret);
-		return ret;
-	}
-
 	fsia_ick = clk_get(&fsi_device.dev, "icka");
 	if (IS_ERR(fsia_ick)) {
 		ret = PTR_ERR(fsia_ick);
@@ -953,16 +998,9 @@ static int __init fsi_init_pm_clock(void)
 	}
 
 	ret = clk_set_parent(fsia_ick, &sh7372_fsiack_clk);
-	if (ret < 0) {
-		pr_err("Cannot set FSI-A parent: %d\n", ret);
-		goto out;
-	}
-
-	ret = clk_set_rate(fsia_ick, FSIACK_DUMMY_RATE);
 	if (ret < 0)
-		pr_err("Cannot set FSI-A rate: %d\n", ret);
+		pr_err("Cannot set FSI-A parent: %d\n", ret);
 
-out:
 	clk_put(fsia_ick);
 
 	return ret;
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c
index 136414f163e9..4c2404b1b862 100644
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -902,18 +902,12 @@ static int fsi_dai_hw_params(struct snd_pcm_substream *substream,
 	struct fsi_master *master = fsi_get_master(fsi);
 	int (*set_rate)(struct device *dev, int is_porta, int rate, int enable);
 	int fsi_ver = master->core->ver;
-	int is_play = fsi_is_play(substream);
 	long rate = params_rate(params);
 	int ret;
 
-	/* if slave mode, set_rate is not needed */
-	if (!fsi_is_master_mode(fsi, is_play))
-		return 0;
-
-	/* it is error if no set_rate */
 	set_rate = master->info->set_rate;
 	if (!set_rate)
-		return -EIO;
+		return 0;
 
 	ret = set_rate(dai->dev, fsi_is_port_a(fsi), rate, 1);
 	if (ret < 0) /* error */
-- 
cgit v1.2.3


From a57b1a9bdfb61e8d7dc7acc5e2d8bc04c549e668 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 19 Nov 2010 07:23:26 +0000
Subject: ARM: mach-shmobile: clock-sh7372: modify error code

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/clock-sh7372.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 4191e2921127..41663e0ca104 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -454,7 +454,7 @@ static int fsidiv_enable(struct clk *clk)
 
 	value  = __raw_readl(clk->mapping->base) >> 16;
 	if (value < 2)
-		return -ENOENT;
+		return -EIO;
 
 	__raw_writel((value << 16) | 0x3, clk->mapping->base);
 
@@ -468,7 +468,7 @@ static int fsidiv_set_rate(struct clk *clk,
 
 	idx = (clk->parent->rate / rate) & 0xffff;
 	if (idx < 2)
-		return -ENOENT;
+		return -EINVAL;
 
 	__raw_writel(idx << 16, clk->mapping->base);
 	return 0;
-- 
cgit v1.2.3


From b16a2892b9852839307894cc429b7a7b145138a7 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 19 Nov 2010 07:23:32 +0000
Subject: ARM: mach-shmobile: clock-sh7372: remove unnecessary fsi clocks

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/clock-sh7372.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 41663e0ca104..cbeca2849918 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -602,8 +602,6 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]),
 	CLKDEV_CON_ID("fmsi_clk", &div6_clks[DIV6_FMSI]),
 	CLKDEV_CON_ID("fmso_clk", &div6_clks[DIV6_FMSO]),
-	CLKDEV_CON_ID("fsia_clk", &div6_reparent_clks[DIV6_FSIA]),
-	CLKDEV_CON_ID("fsib_clk", &div6_reparent_clks[DIV6_FSIB]),
 	CLKDEV_CON_ID("sub_clk", &div6_clks[DIV6_SUB]),
 	CLKDEV_CON_ID("spu_clk", &div6_clks[DIV6_SPU]),
 	CLKDEV_CON_ID("vou_clk", &div6_clks[DIV6_VOU]),
-- 
cgit v1.2.3


From 421b446abeec55bed1251fab80cb5c12be58b773 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 19 Nov 2010 07:23:52 +0000
Subject: ARM: mach-shmobile: clock-sh7372: remove bogus pllc2 clock toggling.

The PLLC2 clock was utilizing the same sort of enable/disable without
regard to usecount approach that the FSIDIV clock was when being used as
a PLL pass-through. This forces the enable/disable through the clock
framework, which now prevents the clock from being ripped out or modified
underneath users that have an existing handle on it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/board-ap4evb.c |  5 +++++
 arch/arm/mach-shmobile/clock-sh7372.c | 12 ++----------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index e084b423146e..d440e5f456ad 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -969,6 +969,11 @@ static int __init hdmi_init_pm_clock(void)
 		goto out;
 	}
 
+	ret = clk_enable(&sh7372_pllc2_clk);
+	if (ret < 0) {
+		pr_err("Cannot enable pllc2 clock\n");
+		goto out;
+	}
 	pr_debug("PLLC2 set frequency %lu\n", rate);
 
 	ret = clk_set_parent(hdmi_ick, &sh7372_pllc2_clk);
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index cbeca2849918..e18a1241a95e 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -230,21 +230,13 @@ static int pllc2_set_rate(struct clk *clk,
 	if (idx < 0)
 		return idx;
 
-	if (rate == clk->parent->rate) {
-		pllc2_disable(clk);
-		return 0;
-	}
+	if (rate == clk->parent->rate)
+		return -EINVAL;
 
 	value = __raw_readl(PLLC2CR) & ~(0x3f << 24);
 
-	if (value & 0x80000000)
-		pllc2_disable(clk);
-
 	__raw_writel((value & ~0x80000000) | ((idx + 19) << 24), PLLC2CR);
 
-	if (value & 0x80000000)
-		return pllc2_enable(clk);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 08b1a38465cab8c2224a5202c7a3b5e5f5630894 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Wed, 24 Nov 2010 10:20:33 +0800
Subject: ASoC: wm8961 - clear WM8961_DACSLOPE bit for normal mode

DACSLOPE bit of Register 06h ADC and DAC Control 2:
        0: Normal mode
        1: Sloping stop-band mode

Thus in the case of normal mode, we should clear DACSLOPE bit.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
---
 sound/soc/codecs/wm8961.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/wm8961.c b/sound/soc/codecs/wm8961.c
index 4f326f604104..93fecf5f94c6 100644
--- a/sound/soc/codecs/wm8961.c
+++ b/sound/soc/codecs/wm8961.c
@@ -711,7 +711,7 @@ static int wm8961_hw_params(struct snd_pcm_substream *substream,
 	if (fs <= 24000)
 		reg |= WM8961_DACSLOPE;
 	else
-		reg &= WM8961_DACSLOPE;
+		reg &= ~WM8961_DACSLOPE;
 	snd_soc_write(codec, WM8961_ADC_DAC_CONTROL_2, reg);
 
 	return 0;
-- 
cgit v1.2.3


From 2f7dceeda4708f470fd927adb3861bd8ebbe2310 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Wed, 24 Nov 2010 10:21:54 +0800
Subject: ASoC: wm8961 - clear WM8961_MCLKDIV bit for freq <= 16500000

MCLKDIV bit of Register 04h Clocking1:
	0 : Divide by 1
	1 : Divide by 2

Thus in the case of freq <= 16500000, we should clear MCLKDIV bit.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
---
 sound/soc/codecs/wm8961.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/wm8961.c b/sound/soc/codecs/wm8961.c
index 93fecf5f94c6..8340485c9851 100644
--- a/sound/soc/codecs/wm8961.c
+++ b/sound/soc/codecs/wm8961.c
@@ -736,7 +736,7 @@ static int wm8961_set_sysclk(struct snd_soc_dai *dai, int clk_id,
 		freq /= 2;
 	} else {
 		dev_dbg(codec->dev, "Using MCLK/1 for %dHz MCLK\n", freq);
-		reg &= WM8961_MCLKDIV;
+		reg &= ~WM8961_MCLKDIV;
 	}
 
 	snd_soc_write(codec, WM8961_CLOCKING1, reg);
-- 
cgit v1.2.3


From 5c12d20145ce30f9f8b7415d36dace5fb4dcc4f0 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Wed, 24 Nov 2010 15:20:48 +0800
Subject: ASoC: Return proper error for omap3pandora_soc_init

Return PTR_ERR(omap3pandora_dac_reg) instead of 0 if regulator_get failed.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/omap/omap3pandora.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c
index dbd9d96b5f92..4ee33ce2cb98 100644
--- a/sound/soc/omap/omap3pandora.c
+++ b/sound/soc/omap/omap3pandora.c
@@ -306,6 +306,7 @@ static int __init omap3pandora_soc_init(void)
 		pr_err(PREFIX "Failed to get DAC regulator from %s: %ld\n",
 			dev_name(&omap3pandora_snd_device->dev),
 			PTR_ERR(omap3pandora_dac_reg));
+		ret = PTR_ERR(omap3pandora_dac_reg);
 		goto fail3;
 	}
 
-- 
cgit v1.2.3


From d6f443ae4c1d54379ad5953d7bcb89a63387184d Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Wed, 24 Nov 2010 16:44:23 +0800
Subject: ASoC: nuc900-ac97: fix a memory leak

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Acked-by: Wan ZongShun <mcuos.com@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/nuc900/nuc900-ac97.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/nuc900/nuc900-ac97.c b/sound/soc/nuc900/nuc900-ac97.c
index 293dc748797c..e00e39dd6576 100644
--- a/sound/soc/nuc900/nuc900-ac97.c
+++ b/sound/soc/nuc900/nuc900-ac97.c
@@ -384,7 +384,6 @@ out0:
 
 static int __devexit nuc900_ac97_drvremove(struct platform_device *pdev)
 {
-
 	snd_soc_unregister_dai(&pdev->dev);
 
 	clk_put(nuc900_ac97_data->clk);
@@ -392,6 +391,7 @@ static int __devexit nuc900_ac97_drvremove(struct platform_device *pdev)
 	release_mem_region(nuc900_ac97_data->res->start,
 				resource_size(nuc900_ac97_data->res));
 
+	kfree(nuc900_ac97_data);
 	nuc900_ac97_data = NULL;
 
 	return 0;
-- 
cgit v1.2.3


From cc1c452e509aefc28f7ad2deed75bc69d4f915f7 Mon Sep 17 00:00:00 2001
From: David Henningsson <david.henningsson@canonical.com>
Date: Wed, 24 Nov 2010 14:17:47 +0100
Subject: ALSA: HDA: Add an extra DAC for Realtek ALC887-VD

The patch enables ALC887-VD to use the DAC at nid 0x26,
which makes it possible to use this DAC for e g Headphone
volume.

Signed-off-by: David Henningsson <david.henningsson@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8f7530fc7644..b0e6b8b47fa9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -18997,6 +18997,8 @@ static inline hda_nid_t alc662_mix_to_dac(hda_nid_t nid)
 		return 0x02;
 	else if (nid >= 0x0c && nid <= 0x0e)
 		return nid - 0x0c + 0x02;
+	else if (nid == 0x26) /* ALC887-VD has this DAC too */
+		return 0x25;
 	else
 		return 0;
 }
@@ -19005,7 +19007,7 @@ static inline hda_nid_t alc662_mix_to_dac(hda_nid_t nid)
 static hda_nid_t alc662_dac_to_mix(struct hda_codec *codec, hda_nid_t pin,
 				   hda_nid_t dac)
 {
-	hda_nid_t mix[4];
+	hda_nid_t mix[5];
 	int i, num;
 
 	num = snd_hda_get_connections(codec, pin, mix, ARRAY_SIZE(mix));
-- 
cgit v1.2.3


From 5b5c1af104ab5adec1be9dcb4c787492d83d8d83 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 24 Nov 2010 12:09:41 +0000
Subject: xen: x86/32: perform initial startup on initial_page_table

Only make swapper_pg_dir readonly and pinned when generic x86 architecture code
(which also starts on initial_page_table) switches to it.  This helps ensure
that the generic setup paths work on Xen unmodified. In particular
clone_pgd_range writes directly to the destination pgd entries and is used to
initialise swapper_pg_dir so we need to ensure that it remains writeable until
the last possible moment during bring up.

This is complicated slightly by the need to avoid sharing kernel PMD entries
when running under Xen, therefore the Xen implementation must make a copy of
the kernel PMD (which is otherwise referred to by both intial_page_table and
swapper_pg_dir) before switching to swapper_pg_dir.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c |  2 --
 arch/x86/xen/mmu.c       | 69 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 235c0f4d3861..ff82909801b6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1191,8 +1191,6 @@ asmlinkage void __init xen_start_kernel(void)
 	/* Allocate and initialize top and mid mfn levels for p2m structure */
 	xen_build_mfn_list_list();
 
-	init_mm.pgd = pgd;
-
 	/* keep using Xen gdt for now; no urgent need to change it */
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 21ed8d7f75a5..c9cf23e17440 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2119,44 +2119,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 	return pgd;
 }
 #else	/* !CONFIG_X86_64 */
-static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD);
+static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
+static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
+
+static __init void xen_write_cr3_init(unsigned long cr3)
+{
+	unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
+
+	BUG_ON(read_cr3() != __pa(initial_page_table));
+	BUG_ON(cr3 != __pa(swapper_pg_dir));
+
+	/*
+	 * We are switching to swapper_pg_dir for the first time (from
+	 * initial_page_table) and therefore need to mark that page
+	 * read-only and then pin it.
+	 *
+	 * Xen disallows sharing of kernel PMDs for PAE
+	 * guests. Therefore we must copy the kernel PMD from
+	 * initial_page_table into a new kernel PMD to be used in
+	 * swapper_pg_dir.
+	 */
+	swapper_kernel_pmd =
+		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
+	memcpy(swapper_kernel_pmd, initial_kernel_pmd,
+	       sizeof(pmd_t) * PTRS_PER_PMD);
+	swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
+		__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
+	set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
+
+	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+	xen_write_cr3(cr3);
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
+
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
+			  PFN_DOWN(__pa(initial_page_table)));
+	set_page_prot(initial_page_table, PAGE_KERNEL);
+	set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
+
+	pv_mmu_ops.write_cr3 = &xen_write_cr3;
+}
 
 __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
 
-	level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
+	initial_kernel_pmd =
+		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
 
 	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
 				  xen_start_info->nr_pt_frames * PAGE_SIZE +
 				  512*1024);
 
 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
-	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+	memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
 
-	xen_map_identity_early(level2_kernel_pgt, max_pfn);
+	xen_map_identity_early(initial_kernel_pmd, max_pfn);
 
-	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
-	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
-			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+	memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+	initial_page_table[KERNEL_PGD_BOUNDARY] =
+		__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
 
-	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
-	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+	set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
+	set_page_prot(initial_page_table, PAGE_KERNEL_RO);
 	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
 
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
-	xen_write_cr3(__pa(swapper_pg_dir));
-
-	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
+			  PFN_DOWN(__pa(initial_page_table)));
+	xen_write_cr3(__pa(initial_page_table));
 
 	memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
 		      __pa(xen_start_info->pt_base +
 			   xen_start_info->nr_pt_frames * PAGE_SIZE),
 		      "XEN PAGETABLES");
 
-	return swapper_pg_dir;
+	return initial_page_table;
 }
 #endif	/* CONFIG_X86_64 */
 
@@ -2290,7 +2329,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.write_cr2 = xen_write_cr2,
 
 	.read_cr3 = xen_read_cr3,
+#ifdef CONFIG_X86_32
+	.write_cr3 = xen_write_cr3_init,
+#else
 	.write_cr3 = xen_write_cr3,
+#endif
 
 	.flush_tlb_user = xen_flush_tlb,
 	.flush_tlb_kernel = xen_flush_tlb,
-- 
cgit v1.2.3


From e6d4a76dbf2ff27314e09291dfb9e4afcb9ecd60 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Sat, 20 Nov 2010 20:05:46 +0800
Subject: xen: remove duplicated #include

Remove duplicated #include('s) in
  arch/x86/xen/setup.c

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 769c4b01fa32..d392486179e7 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -23,7 +23,6 @@
 #include <xen/interface/callback.h>
 #include <xen/interface/memory.h>
 #include <xen/interface/physdev.h>
-#include <xen/interface/memory.h>
 #include <xen/features.h>
 
 #include "xen-ops.h"
-- 
cgit v1.2.3


From 9915672d41273f5b77f1b3c29b391ffb7732b84b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 24 Nov 2010 09:15:27 -0800
Subject: af_unix: limit unix_tot_inflight

Vegard Nossum found a unix socket OOM was possible, posting an exploit
program.

My analysis is we can eat all LOWMEM memory before unix_gc() being
called from unix_release_sock(). Moreover, the thread blocked in
unix_gc() can consume huge amount of time to perform cleanup because of
huge working set.

One way to handle this is to have a sensible limit on unix_tot_inflight,
tested from wait_for_unix_gc() and to force a call to unix_gc() if this
limit is hit.

This solves the OOM and also reduce overall latencies, and should not
slowdown normal workloads.

Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/garbage.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c8df6fda0b1f..40df93d1cf35 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 }
 
 static bool gc_in_progress = false;
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
 
 void wait_for_unix_gc(void)
 {
+	/*
+	 * If number of inflight sockets is insane,
+	 * force a garbage collect right now.
+	 */
+	if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+		unix_gc();
 	wait_event(unix_gc_wait, gc_in_progress == false);
 }
 
-- 
cgit v1.2.3


From e5a06939736277c54a68ae275433db55b99d187c Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Mon, 1 Nov 2010 17:00:37 -0400
Subject: drivers/net/tile/: on-chip network drivers for the tile architecture

This change adds the first network driver for the tile architecture,
supporting the on-chip XGBE and GBE shims.

The infrastructure is present for the TILE-Gx networking drivers (another
three source files in the new directory) but for now the the actual
tilegx sources are waiting on releasing hardware to initial customers.

Note that arch/tile/include/hv/* are "upstream" headers from the
Tilera hypervisor and will probably benefit less from LKML review.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 MAINTAINERS                          |    1 +
 arch/tile/include/asm/cacheflush.h   |   52 +
 arch/tile/include/asm/processor.h    |   10 +
 arch/tile/include/hv/drv_xgbe_impl.h |  300 ++++
 arch/tile/include/hv/drv_xgbe_intf.h |  615 +++++++
 arch/tile/include/hv/netio_errors.h  |  122 ++
 arch/tile/include/hv/netio_intf.h    | 2975 ++++++++++++++++++++++++++++++++++
 arch/tile/mm/init.c                  |    8 +-
 drivers/net/Kconfig                  |   12 +
 drivers/net/Makefile                 |    1 +
 drivers/net/tile/Makefile            |   10 +
 drivers/net/tile/tilepro.c           | 2406 +++++++++++++++++++++++++++
 12 files changed, 6510 insertions(+), 2 deletions(-)
 create mode 100644 arch/tile/include/hv/drv_xgbe_impl.h
 create mode 100644 arch/tile/include/hv/drv_xgbe_intf.h
 create mode 100644 arch/tile/include/hv/netio_errors.h
 create mode 100644 arch/tile/include/hv/netio_intf.h
 create mode 100644 drivers/net/tile/Makefile
 create mode 100644 drivers/net/tile/tilepro.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 2525b04f2e25..70b3820aa58e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5828,6 +5828,7 @@ W:	http://www.tilera.com/scm/
 S:	Supported
 F:	arch/tile/
 F:	drivers/char/hvc_tile.c
+F:	drivers/net/tile/
 
 TLAN NETWORK DRIVER
 M:	Samuel Chessman <chessman@tux.org>
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index c5741da4eeac..14a3f8556ace 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size)
 	mb_incoherent();
 }
 
+/*
+ * Flush & invalidate a VA range that is homed remotely on a single core,
+ * waiting until the memory controller holds the flushed values.
+ */
+static inline void finv_buffer_remote(void *buffer, size_t size)
+{
+	char *p;
+	int i;
+
+	/*
+	 * Flush and invalidate the buffer out of the local L1/L2
+	 * and request the home cache to flush and invalidate as well.
+	 */
+	__finv_buffer(buffer, size);
+
+	/*
+	 * Wait for the home cache to acknowledge that it has processed
+	 * all the flush-and-invalidate requests.  This does not mean
+	 * that the flushed data has reached the memory controller yet,
+	 * but it does mean the home cache is processing the flushes.
+	 */
+	__insn_mf();
+
+	/*
+	 * Issue a load to the last cache line, which can't complete
+	 * until all the previously-issued flushes to the same memory
+	 * controller have also completed.  If we weren't striping
+	 * memory, that one load would be sufficient, but since we may
+	 * be, we also need to back up to the last load issued to
+	 * another memory controller, which would be the point where
+	 * we crossed an 8KB boundary (the granularity of striping
+	 * across memory controllers).  Keep backing up and doing this
+	 * until we are before the beginning of the buffer, or have
+	 * hit all the controllers.
+	 */
+	for (i = 0, p = (char *)buffer + size - 1;
+	     i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer;
+	     ++i) {
+		const unsigned long STRIPE_WIDTH = 8192;
+
+		/* Force a load instruction to issue. */
+		*(volatile char *)p;
+
+		/* Jump to end of previous stripe. */
+		p -= STRIPE_WIDTH;
+		p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1));
+	}
+
+	/* Wait for the loads (and thus flushes) to have completed. */
+	__insn_mf();
+}
+
 #endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 1747ff3946b2..a9e7c8760334 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -292,8 +292,18 @@ extern int kstack_hash;
 /* Are we using huge pages in the TLB for kernel data? */
 extern int kdata_huge;
 
+/* Support standard Linux prefetching. */
+#define ARCH_HAS_PREFETCH
+#define prefetch(x) __builtin_prefetch(x)
 #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE()
 
+/* Bring a value into the L1D, faulting the TLB if necessary. */
+#ifdef __tilegx__
+#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x))
+#else
+#define prefetch_L1(x) __insn_prefetch_L1((void *)(x))
+#endif
+
 #else /* __ASSEMBLY__ */
 
 /* Do some slow action (e.g. read a slow SPR). */
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h
new file mode 100644
index 000000000000..3a73b2b44913
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_impl.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drivers/xgbe/impl.h
+ * Implementation details for the NetIO library.
+ */
+
+#ifndef __DRV_XGBE_IMPL_H__
+#define __DRV_XGBE_IMPL_H__
+
+#include <hv/netio_errors.h>
+#include <hv/netio_intf.h>
+#include <hv/drv_xgbe_intf.h>
+
+
+/** How many groups we have (log2). */
+#define LOG2_NUM_GROUPS (12)
+/** How many groups we have. */
+#define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
+
+/** Number of output requests we'll buffer per tile. */
+#define EPP_REQS_PER_TILE (32)
+
+/** Words used in an eDMA command without checksum acceleration. */
+#define EDMA_WDS_NO_CSUM      8
+/** Words used in an eDMA command with checksum acceleration. */
+#define EDMA_WDS_CSUM        10
+/** Total available words in the eDMA command FIFO. */
+#define EDMA_WDS_TOTAL      128
+
+
+/*
+ * FIXME: These definitions are internal and should have underscores!
+ * NOTE: The actual numeric values here are intentional and allow us to
+ * optimize the concept "if small ... else if large ... else ...", by
+ * checking for the low bit being set, and then for non-zero.
+ * These are used as array indices, so they must have the values (0, 1, 2)
+ * in some order.
+ */
+#define SIZE_SMALL (1)       /**< Small packet queue. */
+#define SIZE_LARGE (2)       /**< Large packet queue. */
+#define SIZE_JUMBO (0)       /**< Jumbo packet queue. */
+
+/** The number of "SIZE_xxx" values. */
+#define NETIO_NUM_SIZES 3
+
+
+/*
+ * Default numbers of packets for IPP drivers.  These values are chosen
+ * such that CIPP1 will not overflow its L2 cache.
+ */
+
+/** The default number of small packets. */
+#define NETIO_DEFAULT_SMALL_PACKETS 2750
+/** The default number of large packets. */
+#define NETIO_DEFAULT_LARGE_PACKETS 2500
+/** The default number of jumbo packets. */
+#define NETIO_DEFAULT_JUMBO_PACKETS 250
+
+
+/** Log2 of the size of a memory arena. */
+#define NETIO_ARENA_SHIFT      24      /* 16 MB */
+/** Size of a memory arena. */
+#define NETIO_ARENA_SIZE       (1 << NETIO_ARENA_SHIFT)
+
+
+/** A queue of packets.
+ *
+ * This structure partially defines a queue of packets waiting to be
+ * processed.  The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler.  The other part of the queue state, the read offset, is
+ * kept in user space, not in hypervisor space, so it is in a separate data
+ * structure.
+ *
+ * The read offset (__packet_receive_read in the user part of the queue
+ * structure) points to the next packet to be read. When the read offset is
+ * equal to the write offset, the queue is empty; therefore the queue must
+ * contain one more slot than the required maximum queue size.
+ *
+ * Here's an example of all 3 state variables and what they mean.  All
+ * pointers move left to right.
+ *
+ * @code
+ *   I   I   V   V   V   V   I   I   I   I
+ *   0   1   2   3   4   5   6   7   8   9  10
+ *           ^       ^       ^               ^
+ *           |               |               |
+ *           |               |               __last_packet_plus_one
+ *           |               __buffer_write
+ *           __packet_receive_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
+ * = 10).  The read pointer is at 2, and the write pointer is at 6; thus,
+ * there are valid, unread packets in slots 2, 3, 4, and 5.  The remaining
+ * slots are invalid (do not contain a packet).
+ */
+typedef struct {
+  /** Byte offset of the next notify packet to be written: zero for the first
+   *  packet on the queue, sizeof (netio_pkt_t) for the second packet on the
+   *  queue, etc. */
+  volatile uint32_t __packet_write;
+
+  /** Offset of the packet after the last valid packet (i.e., when any
+   *  pointer is incremented to this value, it wraps back to zero). */
+  uint32_t __last_packet_plus_one;
+}
+__netio_packet_queue_t;
+
+
+/** A queue of buffers.
+ *
+ * This structure partially defines a queue of empty buffers which have been
+ * obtained via requests to the IPP.  (The elements of the queue are packet
+ * handles, which are transformed into a full netio_pkt_t when the buffer is
+ * retrieved.)  The queue as a whole is written to by an interrupt handler and
+ * read by non-interrupt code; this data structure is what's touched by the
+ * interrupt handler.  The other parts of the queue state, the read offset and
+ * requested write offset, are kept in user space, not in hypervisor space, so
+ * they are in a separate data structure.
+ *
+ * The read offset (__buffer_read in the user part of the queue structure)
+ * points to the next buffer to be read. When the read offset is equal to the
+ * write offset, the queue is empty; therefore the queue must contain one more
+ * slot than the required maximum queue size.
+ *
+ * The requested write offset (__buffer_requested_write in the user part of
+ * the queue structure) points to the slot which will hold the next buffer we
+ * request from the IPP, once we get around to sending such a request.  When
+ * the requested write offset is equal to the write offset, no requests for
+ * new buffers are outstanding; when the requested write offset is one greater
+ * than the read offset, no more requests may be sent.
+ *
+ * Note that, unlike the packet_queue, the buffer_queue places incoming
+ * buffers at decreasing addresses.  This makes the check for "is it time to
+ * wrap the buffer pointer" cheaper in the assembly code which receives new
+ * buffers, and means that the value which defines the queue size,
+ * __last_buffer, is different than in the packet queue.  Also, the offset
+ * used in the packet_queue is already scaled by the size of a packet; here we
+ * use unscaled slot indices for the offsets.  (These differences are
+ * historical, and in the future it's possible that the packet_queue will look
+ * more like this queue.)
+ *
+ * @code
+ * Here's an example of all 4 state variables and what they mean.  Remember:
+ * all pointers move right to left.
+ *
+ *   V   V   V   I   I   R   R   V   V   V
+ *   0   1   2   3   4   5   6   7   8   9
+ *           ^       ^       ^           ^
+ *           |       |       |           |
+ *           |       |       |           __last_buffer
+ *           |       |       __buffer_write
+ *           |       __buffer_requested_write
+ *           __buffer_read
+ * @endcode
+ *
+ * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
+ * The read pointer is at 2, and the write pointer is at 6; thus, there are
+ * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7.  The requested write
+ * pointer is at 4; thus, requests have been made to the IPP for buffers which
+ * will be placed in slots 6 and 5 when they arrive.  Finally, the remaining
+ * slots are invalid (do not contain a buffer).
+ */
+typedef struct
+{
+  /** Ordinal number of the next buffer to be written: 0 for the first slot in
+   *  the queue, 1 for the second slot in the queue, etc. */
+  volatile uint32_t __buffer_write;
+
+  /** Ordinal number of the last buffer (i.e., when any pointer is decremented
+   *  below zero, it is reloaded with this value). */
+  uint32_t __last_buffer;
+}
+__netio_buffer_queue_t;
+
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+typedef struct __netio_queue_impl_t
+{
+  /** The queue of packets waiting to be received. */
+  __netio_packet_queue_t __packet_receive_queue;
+  /** The intr bit mask that IDs this device. */
+  unsigned int __intr_id;
+  /** Offset to queues of empty buffers, one per size. */
+  uint32_t __buffer_queue[NETIO_NUM_SIZES];
+  /** The address of the first EPP tile, or -1 if no EPP. */
+  /* ISSUE: Actually this is always "0" or "~0". */
+  uint32_t __epp_location;
+  /** The queue ID that this queue represents. */
+  unsigned int __queue_id;
+  /** Number of acknowledgements received. */
+  volatile uint32_t __acks_received;
+  /** Last completion number received for packet_sendv. */
+  volatile uint32_t __last_completion_rcv;
+  /** Number of packets allowed to be outstanding. */
+  uint32_t __max_outstanding;
+  /** First VA available for packets. */
+  void* __va_0;
+  /** First VA in second range available for packets. */
+  void* __va_1;
+  /** Padding to align the "__packets" field to the size of a netio_pkt_t. */
+  uint32_t __padding[3];
+  /** The packets themselves. */
+  netio_pkt_t __packets[0];
+}
+netio_queue_impl_t;
+
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+typedef struct __netio_queue_user_impl_t
+{
+  /** The next incoming packet to be read. */
+  uint32_t __packet_receive_read;
+  /** The next empty buffers to be read, one index per size. */
+  uint8_t __buffer_read[NETIO_NUM_SIZES];
+  /** Where the empty buffer we next request from the IPP will go, one index
+   * per size. */
+  uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
+  /** PCIe interface flag. */
+  uint8_t __pcie;
+  /** Number of packets left to be received before we send a credit update. */
+  uint32_t __receive_credit_remaining;
+  /** Value placed in __receive_credit_remaining when it reaches zero. */
+  uint32_t __receive_credit_interval;
+  /** First fast I/O routine index. */
+  uint32_t __fastio_index;
+  /** Number of acknowledgements expected. */
+  uint32_t __acks_outstanding;
+  /** Last completion number requested. */
+  uint32_t __last_completion_req;
+  /** File descriptor for driver. */
+  int __fd;
+}
+netio_queue_user_impl_t;
+
+
+#define NETIO_GROUP_CHUNK_SIZE   64   /**< Max # groups in one IPP request */
+#define NETIO_BUCKET_CHUNK_SIZE  64   /**< Max # buckets in one IPP request */
+
+
+/** Internal structure used to convey packet send information to the
+ * hypervisor.  FIXME: Actually, it's not used for that anymore, but
+ * netio_packet_send() still uses it internally.
+ */
+typedef struct
+{
+  uint16_t flags;              /**< Packet flags (__NETIO_SEND_FLG_xxx) */
+  uint16_t transfer_size;      /**< Size of packet */
+  uint32_t va;                 /**< VA of start of packet */
+  __netio_pkt_handle_t handle; /**< Packet handle */
+  uint32_t csum0;              /**< First checksum word */
+  uint32_t csum1;              /**< Second checksum word */
+}
+__netio_send_cmd_t;
+
+
+/** Flags used in two contexts:
+ *  - As the "flags" member in the __netio_send_cmd_t, above; used only
+ *    for netio_pkt_send_{prepare,commit}.
+ *  - As part of the flags passed to the various send packet fast I/O calls.
+ */
+
+/** Need acknowledgement on this packet.  Note that some code in the
+ *  normal send_pkt fast I/O handler assumes that this is equal to 1. */
+#define __NETIO_SEND_FLG_ACK    0x1
+
+/** Do checksum on this packet.  (Only used with the __netio_send_cmd_t;
+ *  normal packet sends use a special fast I/O index to denote checksumming,
+ *  and multi-segment sends test the checksum descriptor.) */
+#define __NETIO_SEND_FLG_CSUM   0x2
+
+/** Get a completion on this packet.  Only used with multi-segment sends.  */
+#define __NETIO_SEND_FLG_COMPLETION 0x4
+
+/** Position of the number-of-extra-segments value in the flags word.
+    Only used with multi-segment sends. */
+#define __NETIO_SEND_FLG_XSEG_SHIFT 3
+
+/** Width of the number-of-extra-segments value in the flags word. */
+#define __NETIO_SEND_FLG_XSEG_WIDTH 2
+
+#endif /* __DRV_XGBE_IMPL_H__ */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
new file mode 100644
index 000000000000..146e47d5334b
--- /dev/null
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -0,0 +1,615 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_xgbe_intf.h
+ * Interface to the hypervisor XGBE driver.
+ */
+
+#ifndef __DRV_XGBE_INTF_H__
+#define __DRV_XGBE_INTF_H__
+
+/**
+ * An object for forwarding VAs and PAs to the hypervisor.
+ * @ingroup types
+ *
+ * This allows the supervisor to specify a number of areas of memory to
+ * store packet buffers.
+ */
+typedef struct
+{
+  /** The physical address of the memory. */
+  HV_PhysAddr pa;
+  /** Page table entry for the memory.  This is only used to derive the
+   *  memory's caching mode; the PA bits are ignored. */
+  HV_PTE pte;
+  /** The virtual address of the memory. */
+  HV_VirtAddr va;
+  /** Size (in bytes) of the memory area. */
+  int size;
+
+}
+netio_ipp_address_t;
+
+/** The various pread/pwrite offsets into the hypervisor-level driver.
+ * @ingroup types
+ */
+typedef enum
+{
+  /** Inform the Linux driver of the address of the NetIO arena memory.
+   *  This offset is actually only used to convey information from netio
+   *  to the Linux driver; it never makes it from there to the hypervisor.
+   *  Write-only; takes a uint32_t specifying the VA address. */
+  NETIO_FIXED_ADDR               = 0x5000000000000000ULL,
+
+  /** Inform the Linux driver of the size of the NetIO arena memory.
+   *  This offset is actually only used to convey information from netio
+   *  to the Linux driver; it never makes it from there to the hypervisor.
+   *  Write-only; takes a uint32_t specifying the VA size. */
+  NETIO_FIXED_SIZE               = 0x5100000000000000ULL,
+
+  /** Register current tile with IPP.  Write then read: write, takes a
+   *  netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */
+  NETIO_IPP_INPUT_REGISTER_OFF   = 0x6000000000000000ULL,
+
+  /** Unregister current tile from IPP.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL,
+
+  /** Start packets flowing.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_INIT_OFF       = 0x6200000000000000ULL,
+
+  /** Stop packets flowing.  Write-only, takes a dummy argument. */
+  NETIO_IPP_INPUT_UNINIT_OFF     = 0x6300000000000000ULL,
+
+  /** Configure group (typically we group on VLAN).  Write-only: takes an
+   *  array of netio_group_t's, low 24 bits of the offset is the base group
+   *  number times the size of a netio_group_t. */
+  NETIO_IPP_INPUT_GROUP_CFG_OFF  = 0x6400000000000000ULL,
+
+  /** Configure bucket.  Write-only: takes an array of netio_bucket_t's, low
+   *  24 bits of the offset is the base bucket number times the size of a
+   *  netio_bucket_t. */
+  NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL,
+
+  /** Get/set a parameter.  Read or write: read or write data is the parameter
+   *  value, low 32 bits of the offset is a __netio_getset_offset_t. */
+  NETIO_IPP_PARAM_OFF            = 0x6600000000000000ULL,
+
+  /** Get fast I/O index.  Read-only; returns a 4-byte base index value. */
+  NETIO_IPP_GET_FASTIO_OFF       = 0x6700000000000000ULL,
+
+  /** Configure hijack IP address.  Packets with this IPv4 dest address
+   *  go to bucket NETIO_NUM_BUCKETS - 1.  Write-only: takes an IP address
+   *  in some standard form.  FIXME: Define the form! */
+  NETIO_IPP_INPUT_HIJACK_CFG_OFF  = 0x6800000000000000ULL,
+
+  /**
+   * Offsets beyond this point are reserved for the supervisor (although that
+   * enforcement must be done by the supervisor driver itself).
+   */
+  NETIO_IPP_USER_MAX_OFF         = 0x6FFFFFFFFFFFFFFFULL,
+
+  /** Register I/O memory.  Write-only, takes a netio_ipp_address_t. */
+  NETIO_IPP_IOMEM_REGISTER_OFF   = 0x7000000000000000ULL,
+
+  /** Unregister I/O memory.  Write-only, takes a netio_ipp_address_t. */
+  NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL,
+
+  /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux
+   * userspace code due to limitations in the pread/pwrite syscalls. */
+
+  /** Drain LIPP buffers. */
+  NETIO_IPP_DRAIN_OFF              = 0xFA00000000000000ULL,
+
+  /** Supply a netio_ipp_address_t to be used as shared memory for the
+   *  LEPP command queue. */
+  NETIO_EPP_SHM_OFF              = 0xFB00000000000000ULL,
+
+  /* 0xFC... is currently unused. */
+
+  /** Stop IPP/EPP tiles.  Write-only, takes a dummy argument.  */
+  NETIO_IPP_STOP_SHIM_OFF        = 0xFD00000000000000ULL,
+
+  /** Start IPP/EPP tiles.  Write-only, takes a dummy argument.  */
+  NETIO_IPP_START_SHIM_OFF       = 0xFE00000000000000ULL,
+
+  /** Supply packet arena.  Write-only, takes an array of
+    * netio_ipp_address_t values. */
+  NETIO_IPP_ADDRESS_OFF          = 0xFF00000000000000ULL,
+} netio_hv_offset_t;
+
+/** Extract the base offset from an offset */
+#define NETIO_BASE_OFFSET(off)    ((off) & 0xFF00000000000000ULL)
+/** Extract the local offset from an offset */
+#define NETIO_LOCAL_OFFSET(off)   ((off) & 0x00FFFFFFFFFFFFFFULL)
+
+
+/**
+ * Get/set offset.
+ */
+typedef union
+{
+  struct
+  {
+    uint64_t addr:48;        /**< Class-specific address */
+    unsigned int class:8;    /**< Class (e.g., NETIO_PARAM) */
+    unsigned int opcode:8;   /**< High 8 bits of NETIO_IPP_PARAM_OFF */
+  }
+  bits;                      /**< Bitfields */
+  uint64_t word;             /**< Aggregated value to use as the offset */
+}
+__netio_getset_offset_t;
+
+/**
+ * Fast I/O index offsets (must be contiguous).
+ */
+typedef enum
+{
+  NETIO_FASTIO_ALLOCATE         = 0, /**< Get empty packet buffer */
+  NETIO_FASTIO_FREE_BUFFER      = 1, /**< Give buffer back to IPP */
+  NETIO_FASTIO_RETURN_CREDITS   = 2, /**< Give credits to IPP */
+  NETIO_FASTIO_SEND_PKT_NOCK    = 3, /**< Send a packet, no checksum */
+  NETIO_FASTIO_SEND_PKT_CK      = 4, /**< Send a packet, with checksum */
+  NETIO_FASTIO_SEND_PKT_VEC     = 5, /**< Send a vector of packets */
+  NETIO_FASTIO_SENDV_PKT        = 6, /**< Sendv one packet */
+  NETIO_FASTIO_NUM_INDEX        = 7, /**< Total number of fast I/O indices */
+} netio_fastio_index_t;
+
+/** 3-word return type for Fast I/O call. */
+typedef struct
+{
+  int err;            /**< Error code. */
+  uint32_t val0;      /**< Value.  Meaning depends upon the specific call. */
+  uint32_t val1;      /**< Value.  Meaning depends upon the specific call. */
+} netio_fastio_rv3_t;
+
+/** 0-argument fast I/O call */
+int __netio_fastio0(uint32_t fastio_index);
+/** 1-argument fast I/O call */
+int __netio_fastio1(uint32_t fastio_index, uint32_t arg0);
+/** 3-argument fast I/O call, 2-word return value */
+netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0,
+                                       uint32_t arg1, uint32_t arg2);
+/** 4-argument fast I/O call */
+int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3);
+/** 6-argument fast I/O call */
+int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5);
+/** 9-argument fast I/O call */
+int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
+                    uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5,
+                    uint32_t arg6, uint32_t arg7, uint32_t arg8);
+
+/** Allocate an empty packet.
+ * @param fastio_index Fast I/O index.
+ * @param size Size of the packet to allocate.
+ */
+#define __netio_fastio_allocate(fastio_index, size) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size)
+
+/** Free a buffer.
+ * @param fastio_index Fast I/O index.
+ * @param handle Handle for the packet to free.
+ */
+#define __netio_fastio_free_buffer(fastio_index, handle) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle)
+
+/** Increment our receive credits.
+ * @param fastio_index Fast I/O index.
+ * @param credits Number of credits to add.
+ */
+#define __netio_fastio_return_credits(fastio_index, credits) \
+  __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits)
+
+/** Send packet, no checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ */
+#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \
+  __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \
+                  size, va, handle)
+
+/** Send packet, calculate checksum.
+ * @param fastio_index Fast I/O index.
+ * @param ackflag Nonzero if we want an ack.
+ * @param size Size of the packet.
+ * @param va Virtual address of start of packet.
+ * @param handle Packet handle.
+ * @param csum0 Shim checksum header.
+ * @param csum1 Checksum seed.
+ */
+#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \
+                                   csum0, csum1) \
+  __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \
+                  size, va, handle, csum0, csum1)
+
+
+/** Format for the "csum0" argument to the __netio_fastio_send routines
+ * and LEPP.  Note that this is currently exactly identical to the
+ * ShimProtocolOffloadHeader.
+ */
+typedef union
+{
+  struct
+  {
+    unsigned int start_byte:7;       /**< The first byte to be checksummed */
+    unsigned int count:14;           /**< Number of bytes to be checksummed. */
+    unsigned int destination_byte:7; /**< The byte to write the checksum to. */
+    unsigned int reserved:4;         /**< Reserved. */
+  } bits;                            /**< Decomposed method of access. */
+  unsigned int word;                 /**< To send out the IDN. */
+} __netio_checksum_header_t;
+
+
+/** Sendv packet with 1 or 2 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ *        1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment, if 2 segments.
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ *        segment, if 2 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \
+                                     va_F, va_L, len_F_L) \
+  __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L)
+
+/** Send packet on PCIe interface.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe.
+ * @param va_F Virtual address of the packet buffer.
+ * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0.
+ * @param len_F_L Length of the packet buffer in low 16 bits.
+ */
+#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \
+                                     va_F, va_L, len_F_L) \
+  __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L)
+
+/** Sendv packet with 3 or 4 segments.
+ * @param fastio_index Fast I/O index.
+ * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
+ *        1 in next 2 bits; expected checksum in high 16 bits.
+ * @param confno Confirmation number to request, if notify flag set.
+ * @param csum0 Checksum descriptor; if zero, no checksum.
+ * @param va_F Virtual address of first segment.
+ * @param va_L Virtual address of last segment (third segment if 3 segments,
+ *        fourth segment if 4 segments).
+ * @param len_F_L Length of first segment in low 16 bits; length of last
+ *        segment in high 16 bits.
+ * @param va_M0 Virtual address of "middle 0" segment; this segment is sent
+ *        second when there are three segments, and third if there are four.
+ * @param va_M1 Virtual address of "middle 1" segment; this segment is sent
+ *        second when there are four segments.
+ * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle
+ *        1 segment, if 4 segments, in high 16 bits.
+ */
+#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \
+                                     va_L, len_F_L, va_M0, va_M1, len_M0_M1) \
+  __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
+                  csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1)
+
+/** Send vector of packets.
+ * @param fastio_index Fast I/O index.
+ * @param seqno Number of packets transmitted so far on this interface;
+ *        used to decide which packets should be acknowledged.
+ * @param nentries Number of entries in vector.
+ * @param va Virtual address of start of vector entry array.
+ * @return 3-word netio_fastio_rv3_t structure.  The structure's err member
+ *         is an error code, or zero if no error.  The val0 member is the
+ *         updated value of seqno; it has been incremented by 1 for each
+ *         packet sent.  That increment may be less than nentries if an
+ *         error occured, or if some of the entries in the vector contain
+ *         handles equal to NETIO_PKT_HANDLE_NONE.  The val1 member is the
+ *         updated value of nentries; it has been decremented by 1 for each
+ *         vector entry processed.  Again, that decrement may be less than
+ *         nentries (leaving the returned value positive) if an error
+ *         occurred.
+ */
+#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \
+  __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \
+                      nentries, va)
+
+
+/** An egress DMA command for LEPP. */
+typedef struct
+{
+  /** Is this a TSO transfer?
+   *
+   * NOTE: This field is always 0, to distinguish it from
+   * lepp_tso_cmd_t.  It must come first!
+   */
+  uint8_t tso               : 1;
+
+  /** Unused padding bits. */
+  uint8_t _unused           : 3;
+
+  /** Should this packet be sent directly from caches instead of DRAM,
+   * using hash-for-home to locate the packet data?
+   */
+  uint8_t hash_for_home     : 1;
+
+  /** Should we compute a checksum? */
+  uint8_t compute_checksum  : 1;
+
+  /** Is this the final buffer for this packet?
+   *
+   * A single packet can be split over several input buffers (a "gather"
+   * operation).  This flag indicates that this is the last buffer
+   * in a packet.
+   */
+  uint8_t end_of_packet     : 1;
+
+  /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */
+  uint8_t send_completion   : 1;
+
+  /** High bits of Client Physical Address of the start of the buffer
+   *  to be egressed.
+   *
+   *  NOTE: Only 6 bits are actually needed here, as CPAs are
+   *  currently 38 bits.  So two bits could be scavenged from this.
+   */
+  uint8_t cpa_hi;
+
+  /** The number of bytes to be egressed. */
+  uint16_t length;
+
+  /** Low 32 bits of Client Physical Address of the start of the buffer
+   *  to be egressed.
+   */
+  uint32_t cpa_lo;
+
+  /** Checksum information (only used if 'compute_checksum'). */
+  __netio_checksum_header_t checksum_data;
+
+} lepp_cmd_t;
+
+
+/** A chunk of physical memory for a TSO egress. */
+typedef struct
+{
+  /** The low bits of the CPA. */
+  uint32_t cpa_lo;
+  /** The high bits of the CPA. */
+  uint16_t cpa_hi		: 15;
+  /** Should this packet be sent directly from caches instead of DRAM,
+   *  using hash-for-home to locate the packet data?
+   */
+  uint16_t hash_for_home	: 1;
+  /** The length in bytes. */
+  uint16_t length;
+} lepp_frag_t;
+
+
+/** An LEPP command that handles TSO. */
+typedef struct
+{
+  /** Is this a TSO transfer?
+   *
+   *  NOTE: This field is always 1, to distinguish it from
+   *  lepp_cmd_t.  It must come first!
+   */
+  uint8_t tso             : 1;
+
+  /** Unused padding bits. */
+  uint8_t _unused         : 7;
+
+  /** Size of the header[] array in bytes.  It must be in the range
+   *  [40, 127], which are the smallest header for a TCP packet over
+   *  Ethernet and the maximum possible prepend size supported by
+   *  hardware, respectively.  Note that the array storage must be
+   *  padded out to a multiple of four bytes so that the following
+   *  LEPP command is aligned properly.
+   */
+  uint8_t header_size;
+
+  /** Byte offset of the IP header in header[]. */
+  uint8_t ip_offset;
+
+  /** Byte offset of the TCP header in header[]. */
+  uint8_t tcp_offset;
+
+  /** The number of bytes to use for the payload of each packet,
+   *  except of course the last one, which may not have enough bytes.
+   *  This means that each Ethernet packet except the last will have a
+   *  size of header_size + payload_size.
+   */
+  uint16_t payload_size;
+
+  /** The length of the 'frags' array that follows this struct. */
+  uint16_t num_frags;
+
+  /** The actual frags. */
+  lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */];
+
+  /*
+   * The packet header template logically follows frags[],
+   * but you can't declare that in C.
+   *
+   * uint32_t header[header_size_in_words_rounded_up];
+   */
+
+} lepp_tso_cmd_t;
+
+
+/** An LEPP completion ring entry. */
+typedef void* lepp_comp_t;
+
+
+/** Maximum number of frags for one TSO command.  This is adapted from
+ *  linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
+ *  our page size of exactly 65536.  We add one for a "body" fragment.
+ */
+#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
+
+/** Total number of bytes needed for an lepp_tso_cmd_t. */
+#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
+  (sizeof(lepp_tso_cmd_t) + \
+   (num_frags) * sizeof(lepp_frag_t) + \
+   (((header_size) + 3) & -4))
+
+/** The size of the lepp "cmd" queue. */
+#define LEPP_CMD_QUEUE_BYTES \
+ (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \
+  (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t))
+
+/** The largest possible command that can go in lepp_queue_t::cmds[]. */
+#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128)
+
+/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive).
+ */
+#define LEPP_CMD_LIMIT \
+  (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE)
+
+/** The maximum number of completions in an LEPP queue. */
+#define LEPP_COMP_QUEUE_SIZE \
+  ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t))
+
+/** Increment an index modulo the queue size. */
+#define LEPP_QINC(var) \
+  (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1))
+
+/** A queue used to convey egress commands from the client to LEPP. */
+typedef struct
+{
+  /** Index of first completion not yet processed by user code.
+   *  If this is equal to comp_busy, there are no such completions.
+   *
+   *  NOTE: This is only read/written by the user.
+   */
+  unsigned int comp_head;
+
+  /** Index of first completion record not yet completed.
+   *  If this is equal to comp_tail, there are no such completions.
+   *  This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever
+   *  a command with the 'completion' bit set is finished.
+   *
+   *  NOTE: This is only written by LEPP, only read by the user.
+   */
+  volatile unsigned int comp_busy;
+
+  /** Index of the first empty slot in the completion ring.
+   *  Entries from this up to but not including comp_head (in ring order)
+   *  can be filled in with completion data.
+   *
+   *  NOTE: This is only read/written by the user.
+   */
+  unsigned int comp_tail;
+
+  /** Byte index of first command enqueued for LEPP but not yet processed.
+   *
+   *  This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+   *
+   *  NOTE: LEPP advances this counter as soon as it no longer needs
+   *  the cmds[] storage for this entry, but the transfer is not actually
+   *  complete (i.e. the buffer pointed to by the command is no longer
+   *  needed) until comp_busy advances.
+   *
+   *  If this is equal to cmd_tail, the ring is empty.
+   *
+   *  NOTE: This is only written by LEPP, only read by the user.
+   */
+  volatile unsigned int cmd_head;
+
+  /** Byte index of first empty slot in the command ring.  This field can
+   *  be incremented up to but not equal to cmd_head (because that would
+   *  mean the ring is empty).
+   *
+   *  This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
+   *
+   *  NOTE: This is read/written by the user, only read by LEPP.
+   */
+  volatile unsigned int cmd_tail;
+
+  /** A ring of variable-sized egress DMA commands.
+   *
+   *  NOTE: Only written by the user, only read by LEPP.
+   */
+  char cmds[LEPP_CMD_QUEUE_BYTES]
+    __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+
+  /** A ring of user completion data.
+   *  NOTE: Only read/written by the user.
+   */
+  lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE]
+    __attribute__((aligned(CHIP_L2_LINE_SIZE())));
+} lepp_queue_t;
+
+
+/** An internal helper function for determining the number of entries
+ *  available in a ring buffer, given that there is one sentinel.
+ */
+static inline unsigned int
+_lepp_num_free_slots(unsigned int head, unsigned int tail)
+{
+  /*
+   * One entry is reserved for use as a sentinel, to distinguish
+   * "empty" from "full".  So we compute
+   * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation.
+   */
+  return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0);
+}
+
+
+/** Returns how many new comp entries can be enqueued. */
+static inline unsigned int
+lepp_num_free_comp_slots(const lepp_queue_t* q)
+{
+  return _lepp_num_free_slots(q->comp_head, q->comp_tail);
+}
+
+static inline int
+lepp_qsub(int v1, int v2)
+{
+  int delta = v1 - v2;
+  return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE);
+}
+
+
+/** FIXME: Check this from linux, via a new "pwrite()" call. */
+#define LIPP_VERSION 1
+
+
+/** We use exactly two bytes of alignment padding. */
+#define LIPP_PACKET_PADDING 2
+
+/** The minimum size of a "small" buffer (including the padding). */
+#define LIPP_SMALL_PACKET_SIZE 128
+
+/*
+ * NOTE: The following two values should total to less than around
+ * 13582, to keep the total size used for "lipp_state_t" below 64K.
+ */
+
+/** The maximum number of "small" buffers.
+ *  This is enough for 53 network cpus with 128 credits.  Note that
+ *  if these are exhausted, we will fall back to using large buffers.
+ */
+#define LIPP_SMALL_BUFFERS 6785
+
+/** The maximum number of "large" buffers.
+ *  This is enough for 53 network cpus with 128 credits.
+ */
+#define LIPP_LARGE_BUFFERS 6785
+
+#endif /* __DRV_XGBE_INTF_H__ */
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h
new file mode 100644
index 000000000000..e1591bff61b5
--- /dev/null
+++ b/arch/tile/include/hv/netio_errors.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * Error codes returned from NetIO routines.
+ */
+
+#ifndef __NETIO_ERRORS_H__
+#define __NETIO_ERRORS_H__
+
+/**
+ * @addtogroup error
+ *
+ * @brief The error codes returned by NetIO functions.
+ *
+ * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and
+ * a negative value if an error occurs.
+ *
+ * In cases where a NetIO function failed due to a error reported by
+ * system libraries, the error code will be the negation of the
+ * system errno at the time of failure.  The @ref netio_strerror()
+ * function will deliver error strings for both NetIO and system error
+ * codes.
+ *
+ * @{
+ */
+
+/** The set of all NetIO errors. */
+typedef enum
+{
+  /** Operation successfully completed. */
+  NETIO_NO_ERROR        = 0,
+
+  /** A packet was successfully retrieved from an input queue. */
+  NETIO_PKT             = 0,
+
+  /** Largest NetIO error number. */
+  NETIO_ERR_MAX         = -701,
+
+  /** The tile is not registered with the IPP. */
+  NETIO_NOT_REGISTERED  = -701,
+
+  /** No packet was available to retrieve from the input queue. */
+  NETIO_NOPKT           = -702,
+
+  /** The requested function is not implemented. */
+  NETIO_NOT_IMPLEMENTED = -703,
+
+  /** On a registration operation, the target queue already has the maximum
+   *  number of tiles registered for it, and no more may be added.  On a
+   *  packet send operation, the output queue is full and nothing more can
+   *  be queued until some of the queued packets are actually transmitted. */
+  NETIO_QUEUE_FULL      = -704,
+
+  /** The calling process or thread is not bound to exactly one CPU. */
+  NETIO_BAD_AFFINITY    = -705,
+
+  /** Cannot allocate memory on requested controllers. */
+  NETIO_CANNOT_HOME     = -706,
+
+  /** On a registration operation, the IPP specified is not configured
+   *  to support the options requested; for instance, the application
+   *  wants a specific type of tagged headers which the configured IPP
+   *  doesn't support.  Or, the supplied configuration information is
+   *  not self-consistent, or is out of range; for instance, specifying
+   *  both NETIO_RECV and NETIO_NO_RECV, or asking for more than
+   *  NETIO_MAX_SEND_BUFFERS to be preallocated.  On a VLAN or bucket
+   *  configure operation, the number of items, or the base item, was
+   *  out of range.
+   */
+  NETIO_BAD_CONFIG      = -707,
+
+  /** Too many tiles have registered to transmit packets. */
+  NETIO_TOOMANY_XMIT    = -708,
+
+  /** Packet transmission was attempted on a queue which was registered
+      with transmit disabled. */
+  NETIO_UNREG_XMIT      = -709,
+
+  /** This tile is already registered with the IPP. */
+  NETIO_ALREADY_REGISTERED = -710,
+
+  /** The Ethernet link is down. The application should try again later. */
+  NETIO_LINK_DOWN       = -711,
+
+  /** An invalid memory buffer has been specified.  This may be an unmapped
+   * virtual address, or one which does not meet alignment requirements.
+   * For netio_input_register(), this error may be returned when multiple
+   * processes specify different memory regions to be used for NetIO
+   * buffers.  That can happen if these processes specify explicit memory
+   * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init()
+   * has not been called by a common ancestor of the processes.
+   */
+  NETIO_FAULT           = -712,
+
+  /** Cannot combine user-managed shared memory and cache coherence. */
+  NETIO_BAD_CACHE_CONFIG = -713,
+
+  /** Smallest NetIO error number. */
+  NETIO_ERR_MIN         = -713,
+
+#ifndef __DOXYGEN__
+  /** Used internally to mean that no response is needed; never returned to
+   *  an application. */
+  NETIO_NO_RESPONSE     = 1
+#endif
+} netio_error_t;
+
+/** @} */
+
+#endif /* __NETIO_ERRORS_H__ */
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h
new file mode 100644
index 000000000000..8d20972aba2c
--- /dev/null
+++ b/arch/tile/include/hv/netio_intf.h
@@ -0,0 +1,2975 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * NetIO interface structures and macros.
+ */
+
+#ifndef __NETIO_INTF_H__
+#define __NETIO_INTF_H__
+
+#include <hv/netio_errors.h>
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__)
+#include <assert.h>
+#define netio_assert assert  /**< Enable assertions from macros */
+#else
+#define netio_assert(...) ((void)(0))  /**< Disable assertions from macros */
+#endif
+
+/*
+ * If none of these symbols are defined, we're building libnetio in an
+ * environment where we have pthreads, so we'll enable locking.
+ */
+#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \
+    !defined(__NEWLIB__)
+#define _NETIO_PTHREAD       /**< Include a mutex in netio_queue_t below */
+
+/*
+ * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on
+ * per-packet NetIO operations.  We still do pthread locking on things
+ * like netio_input_register, though.  This is used for building
+ * libnetio_unlocked.
+ */
+#ifndef NETIO_UNLOCKED
+
+/* Avoid PLT overhead by using our own inlined per-cpu lock. */
+#include <sched.h>
+typedef int _netio_percpu_mutex_t;
+
+static __inline int
+_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock)
+{
+  *lock = 0;
+  return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock)
+{
+  while (__builtin_expect(__insn_tns(lock), 0))
+    sched_yield();
+  return 0;
+}
+
+static __inline int
+_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock)
+{
+  *lock = 0;
+  return 0;
+}
+
+#else /* NETIO_UNLOCKED */
+
+/* Don't do any locking for per-packet NetIO operations. */
+typedef int _netio_percpu_mutex_t;
+#define _netio_percpu_mutex_init(L)
+#define _netio_percpu_mutex_lock(L)
+#define _netio_percpu_mutex_unlock(L)
+
+#endif /* NETIO_UNLOCKED */
+#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */
+
+/** How many tiles can register for a given queue.
+ *  @ingroup setup */
+#define NETIO_MAX_TILES_PER_QUEUE  64
+
+
+/** Largest permissible queue identifier.
+ *  @ingroup setup  */
+#define NETIO_MAX_QUEUE_ID        255
+
+
+#ifndef __DOXYGEN__
+
+/* Metadata packet checksum/ethertype flags. */
+
+/** The L4 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L4_CSUM_SHIFT           0
+#define _NETIO_PKT_NO_L4_CSUM_RMASK           1
+#define _NETIO_PKT_NO_L4_CSUM_MASK \
+         (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT)
+
+/** The L3 checksum has not been calculated. */
+#define _NETIO_PKT_NO_L3_CSUM_SHIFT           1
+#define _NETIO_PKT_NO_L3_CSUM_RMASK           1
+#define _NETIO_PKT_NO_L3_CSUM_MASK \
+         (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT)
+
+/** The L3 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L3_CSUM_SHIFT          2
+#define _NETIO_PKT_BAD_L3_CSUM_RMASK          1
+#define _NETIO_PKT_BAD_L3_CSUM_MASK \
+         (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT)
+
+/** The Ethernet packet type is unrecognized. */
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT    3
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK    1
+#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \
+         (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \
+          _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT)
+
+/* Metadata packet type flags. */
+
+/** Where the packet type bits are; this field is the index into
+ *  _netio_pkt_info. */
+#define _NETIO_PKT_TYPE_SHIFT        4
+#define _NETIO_PKT_TYPE_RMASK        0x3F
+
+/** How many VLAN tags the packet has, and, if we have two, which one we
+ *  actually grouped on.  A VLAN within a proprietary (Marvell or Broadcom)
+ *  tag is counted here. */
+#define _NETIO_PKT_VLAN_SHIFT        4
+#define _NETIO_PKT_VLAN_RMASK        0x3
+#define _NETIO_PKT_VLAN_MASK \
+         (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT)
+#define _NETIO_PKT_VLAN_NONE         0   /* No VLAN tag. */
+#define _NETIO_PKT_VLAN_ONE          1   /* One VLAN tag. */
+#define _NETIO_PKT_VLAN_TWO_OUTER    2   /* Two VLAN tags, outer one used. */
+#define _NETIO_PKT_VLAN_TWO_INNER    3   /* Two VLAN tags, inner one used. */
+
+/** Which proprietary tags the packet has. */
+#define _NETIO_PKT_TAG_SHIFT         6
+#define _NETIO_PKT_TAG_RMASK         0x3
+#define _NETIO_PKT_TAG_MASK \
+          (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT)
+#define _NETIO_PKT_TAG_NONE          0   /* No proprietary tags. */
+#define _NETIO_PKT_TAG_MRVL          1   /* Marvell HyperG.Stack tags. */
+#define _NETIO_PKT_TAG_MRVL_EXT      2   /* HyperG.Stack extended tags. */
+#define _NETIO_PKT_TAG_BRCM          3   /* Broadcom HiGig tags. */
+
+/** Whether a packet has an LLC + SNAP header. */
+#define _NETIO_PKT_SNAP_SHIFT        8
+#define _NETIO_PKT_SNAP_RMASK        0x1
+#define _NETIO_PKT_SNAP_MASK \
+          (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT)
+
+/* NOTE: Bits 9 and 10 are unused. */
+
+/** Length of any custom data before the L2 header, in words. */
+#define _NETIO_PKT_CUSTOM_LEN_SHIFT  11
+#define _NETIO_PKT_CUSTOM_LEN_RMASK  0x1F
+#define _NETIO_PKT_CUSTOM_LEN_MASK \
+          (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT)
+
+/** The L4 checksum is incorrect (or perhaps has not been calculated). */
+#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16
+#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1
+#define _NETIO_PKT_BAD_L4_CSUM_MASK \
+          (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT)
+
+/** Length of the L2 header, in words. */
+#define _NETIO_PKT_L2_LEN_SHIFT  17
+#define _NETIO_PKT_L2_LEN_RMASK  0x1F
+#define _NETIO_PKT_L2_LEN_MASK \
+          (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT)
+
+
+/* Flags in minimal packet metadata. */
+
+/** We need an eDMA checksum on this packet. */
+#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT            0
+#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK            1
+#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \
+         (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT)
+
+/* Data within the packet information table. */
+
+/* Note that, for efficiency, code which uses these fields assumes that none
+ * of the shift values below are zero.  See uses below for an explanation. */
+
+/** Offset within the L2 header of the innermost ethertype (in halfwords). */
+#define _NETIO_PKT_INFO_ETYPE_SHIFT       6
+#define _NETIO_PKT_INFO_ETYPE_RMASK    0x1F
+
+/** Offset within the L2 header of the VLAN tag (in halfwords). */
+#define _NETIO_PKT_INFO_VLAN_SHIFT       11
+#define _NETIO_PKT_INFO_VLAN_RMASK     0x1F
+
+#endif
+
+
+/** The size of a memory buffer representing a small packet.
+ *  @ingroup egress */
+#define SMALL_PACKET_SIZE 256
+
+/** The size of a memory buffer representing a large packet.
+ *  @ingroup egress */
+#define LARGE_PACKET_SIZE 2048
+
+/** The size of a memory buffer representing a jumbo packet.
+ *  @ingroup egress */
+#define JUMBO_PACKET_SIZE (12 * 1024)
+
+
+/* Common ethertypes.
+ * @ingroup ingress */
+/** @{ */
+/** The ethertype of IPv4. */
+#define ETHERTYPE_IPv4 (0x0800)
+/** The ethertype of ARP. */
+#define ETHERTYPE_ARP (0x0806)
+/** The ethertype of VLANs. */
+#define ETHERTYPE_VLAN (0x8100)
+/** The ethertype of a Q-in-Q header. */
+#define ETHERTYPE_Q_IN_Q (0x9100)
+/** The ethertype of IPv6. */
+#define ETHERTYPE_IPv6 (0x86DD)
+/** The ethertype of MPLS. */
+#define ETHERTYPE_MPLS (0x8847)
+/** @} */
+
+
+/** The possible return values of NETIO_PKT_STATUS.
+ * @ingroup ingress
+ */
+typedef enum
+{
+  /** No problems were detected with this packet. */
+  NETIO_PKT_STATUS_OK,
+  /** The packet is undersized; this is expected behavior if the packet's
+    * ethertype is unrecognized, but otherwise the packet is likely corrupt. */
+  NETIO_PKT_STATUS_UNDERSIZE,
+  /** The packet is oversized and some trailing bytes have been discarded.
+      This is expected behavior for short packets, since it's impossible to
+      precisely determine the amount of padding which may have been added to
+      them to make them meet the minimum Ethernet packet size. */
+  NETIO_PKT_STATUS_OVERSIZE,
+  /** The packet was judged to be corrupt by hardware (for instance, it had
+      a bad CRC, or part of it was discarded due to lack of buffer space in
+      the I/O shim) and should be discarded. */
+  NETIO_PKT_STATUS_BAD
+} netio_pkt_status_t;
+
+
+/** Log2 of how many buckets we have. */
+#define NETIO_LOG2_NUM_BUCKETS (10)
+
+/** How many buckets we have.
+ * @ingroup ingress */
+#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS)
+
+
+/**
+ * @brief A group-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given group.
+ */
+typedef union {
+  /** The header broken down into bits. */
+  struct {
+    /** Whether we should balance on L4, if available */
+    unsigned int __balance_on_l4:1;
+    /** Whether we should balance on L3, if available */
+    unsigned int __balance_on_l3:1;
+    /** Whether we should balance on L2, if available */
+    unsigned int __balance_on_l2:1;
+    /** Reserved for future use */
+    unsigned int __reserved:1;
+    /** The base bucket to use to send traffic */
+    unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS;
+    /** The mask to apply to the balancing value. This must be one less
+     * than a power of two, e.g. 0x3 or 0xFF.
+     */
+    unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS;
+    /** Pad to 32 bits */
+    unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS);
+  } bits;
+  /** To send out the IDN. */
+  unsigned int word;
+}
+netio_group_t;
+
+
+/**
+ * @brief A VLAN-to-bucket identifier.
+ *
+ * @ingroup setup
+ *
+ * This tells us what to do with a given VLAN.
+ */
+typedef netio_group_t netio_vlan_t;
+
+
+/**
+ * A bucket-to-queue mapping.
+ * @ingroup setup
+ */
+typedef unsigned char netio_bucket_t;
+
+
+/**
+ * A packet size can always fit in a netio_size_t.
+ * @ingroup setup
+ */
+typedef unsigned int netio_size_t;
+
+
+/**
+ * @brief Ethernet standard (ingress) packet metadata.
+ *
+ * @ingroup ingress
+ *
+ * This is additional data associated with each packet.
+ * This structure is opaque and accessed through the @ref ingress.
+ *
+ * Also, the buffer population operation currently assumes that standard
+ * metadata is at least as large as minimal metadata, and will need to be
+ * modified if that is no longer the case.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[24];
+#else
+  /** The overall ordinal of the packet */
+  unsigned int __packet_ordinal;
+  /** The ordinal of the packet within the group */
+  unsigned int __group_ordinal;
+  /** The best flow hash IPP could compute. */
+  unsigned int __flow_hash;
+  /** Flags pertaining to checksum calculation, packet type, etc. */
+  unsigned int __flags;
+  /** The first word of "user data". */
+  unsigned int __user_data_0;
+  /** The second word of "user data". */
+  unsigned int __user_data_1;
+#endif
+}
+netio_pkt_metadata_t;
+
+
+/** To ensure that the L3 header is aligned mod 4, the L2 header should be
+ * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes
+ * long.  The standard way to do this is to simply add 2 bytes of padding
+ * before the L2 header.
+ */
+#define NETIO_PACKET_PADDING 2
+
+
+
+/**
+ * @brief Ethernet minimal (egress) packet metadata.
+ *
+ * @ingroup egress
+ *
+ * This structure represents information about packets which have
+ * been processed by @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer().  This structure is opaque
+ * and accessed through the @ref egress.
+ *
+ * @internal This structure is actually copied into the memory used by
+ * standard metadata, which is assumed to be large enough.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[14];
+#else
+  /** The offset of the L2 header from the start of the packet data. */
+  unsigned short l2_offset;
+  /** The offset of the L3 header from the start of the packet data. */
+  unsigned short l3_offset;
+  /** Where to write the checksum. */
+  unsigned char csum_location;
+  /** Where to start checksumming from. */
+  unsigned char csum_start;
+  /** Flags pertaining to checksum calculation etc. */
+  unsigned short flags;
+  /** The L2 length of the packet. */
+  unsigned short l2_length;
+  /** The checksum with which to seed the checksum generator. */
+  unsigned short csum_seed;
+  /** How much to checksum. */
+  unsigned short csum_length;
+#endif
+}
+netio_pkt_minimal_metadata_t;
+
+
+#ifndef __DOXYGEN__
+
+/**
+ * @brief An I/O notification header.
+ *
+ * This is the first word of data received from an I/O shim in a notification
+ * packet. It contains framing and status information.
+ */
+typedef union
+{
+  unsigned int word; /**< The whole word. */
+  /** The various fields. */
+  struct
+  {
+    unsigned int __channel:7;    /**< Resource channel. */
+    unsigned int __type:4;       /**< Type. */
+    unsigned int __ack:1;        /**< Whether an acknowledgement is needed. */
+    unsigned int __reserved:1;   /**< Reserved. */
+    unsigned int __protocol:1;   /**< A protocol-specific word is added. */
+    unsigned int __status:2;     /**< Status of the transfer. */
+    unsigned int __framing:2;    /**< Framing of the transfer. */
+    unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */
+  } bits;
+}
+__netio_pkt_notif_t;
+
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_HANDLE_BASE(p) \
+  ((unsigned char*)((p).word & 0xFFFFFFC0))
+
+/**
+ * Returns the base address of the packet.
+ */
+#define _NETIO_PKT_BASE(p) \
+  _NETIO_PKT_HANDLE_BASE(p->__packet)
+
+/**
+ * @brief An I/O notification packet (second word)
+ *
+ * This is the second word of data received from an I/O shim in a notification
+ * packet.  This is the virtual address of the packet buffer, plus some flag
+ * bits.  (The virtual address of the packet is always 256-byte aligned so we
+ * have room for 8 bits' worth of flags in the low 8 bits.)
+ *
+ * @internal
+ * NOTE: The low two bits must contain "__queue", so the "packet size"
+ * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly.
+ *
+ * If __addr or __offset are moved, _NETIO_PKT_BASE
+ * (defined right below this) must be changed.
+ */
+typedef union
+{
+  unsigned int word; /**< The whole word. */
+  /** The various fields. */
+  struct
+  {
+    /** Which queue the packet will be returned to once it is sent back to
+        the IPP.  This is one of the SIZE_xxx values. */
+    unsigned int __queue:2;
+
+    /** The IPP handle of the sending IPP. */
+    unsigned int __ipp_handle:2;
+
+    /** Reserved for future use. */
+    unsigned int __reserved:1;
+
+    /** If 1, this packet has minimal (egress) metadata; otherwise, it
+        has standard (ingress) metadata. */
+    unsigned int __minimal:1;
+
+    /** Offset of the metadata within the packet.  This value is multiplied
+     *  by 64 and added to the base packet address to get the metadata
+     *  address.  Note that this field is aligned within the word such that
+     *  you can easily extract the metadata address with a 26-bit mask. */
+    unsigned int __offset:2;
+
+    /** The top 24 bits of the packet's virtual address. */
+    unsigned int __addr:24;
+  } bits;
+}
+__netio_pkt_handle_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/**
+ * @brief A handle for an I/O packet's storage.
+ * @ingroup ingress
+ *
+ * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its
+ * packet metadata removed.  It is a much smaller type that exists to
+ * facilitate applications where the full ::netio_pkt_t type is too
+ * large, such as those that cache enormous numbers of packets or wish
+ * to transmit packet descriptors over the UDN.
+ *
+ * Because there is no metadata, most ::netio_pkt_t operations cannot be
+ * performed on a netio_pkt_handle_t.  It supports only
+ * netio_free_handle() (to free the buffer) and
+ * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents).
+ * The application must acquire any additional metadata it wants from the
+ * original ::netio_pkt_t and record it separately.
+ *
+ * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling
+ * NETIO_PKT_HANDLE().  An invalid handle (analogous to NULL) can be
+ * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can
+ * be tested for validity with NETIO_PKT_HANDLE_IS_VALID().
+ */
+typedef struct
+{
+  unsigned int word; /**< Opaque bits. */
+} netio_pkt_handle_t;
+
+/**
+ * @brief A packet descriptor.
+ *
+ * @ingroup ingress
+ * @ingroup egress
+ *
+ * This data structure represents a packet.  The structure is manipulated
+ * through the @ref ingress and the @ref egress.
+ *
+ * While the contents of a netio_pkt_t are opaque, the structure itself is
+ * portable.  This means that it may be shared between all tiles which have
+ * done a netio_input_register() call for the interface on which the pkt_t
+ * was initially received (via netio_get_packet()) or retrieved (via
+ * netio_get_buffer()).  The contents of a netio_pkt_t can be transmitted to
+ * another tile via shared memory, or via a UDN message, or by other means.
+ * The destination tile may then use the pkt_t as if it had originally been
+ * received locally; it may read or write the packet's data, read its
+ * metadata, free the packet, send the packet, transfer the netio_pkt_t to
+ * yet another tile, and so forth.
+ *
+ * Once a netio_pkt_t has been transferred to a second tile, the first tile
+ * should not reference the original copy; in particular, if more than one
+ * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will
+ * become corrupted.  Note also that each tile which reads or modifies
+ * packet data must obey the memory coherency rules outlined in @ref input.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  /** This structure is opaque. */
+  unsigned char opaque[32];
+#else
+  /** For an ingress packet (one with standard metadata), this is the
+   *  notification header we got from the I/O shim.  For an egress packet
+   *  (one with minimal metadata), this word is zero if the packet has not
+   *  been populated, and nonzero if it has. */
+  __netio_pkt_notif_t __notif_header;
+
+  /** Virtual address of the packet buffer, plus state flags. */
+  __netio_pkt_handle_t __packet;
+
+  /** Metadata associated with the packet. */
+  netio_pkt_metadata_t __metadata;
+#endif
+}
+netio_pkt_t;
+
+
+#ifndef __DOXYGEN__
+
+#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header)
+#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle)
+#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal)
+#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue)
+#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags)
+
+/* Packet information table, used by the attribute access functions below. */
+extern const uint16_t _netio_pkt_info[];
+
+#endif /* __DOXYGEN__ */
+
+
+#ifndef __DOXYGEN__
+/* These macros are deprecated and will disappear in a future MDE release. */
+#define NETIO_PKT_GOOD_CHECKSUM(pkt) \
+  NETIO_PKT_L4_CSUM_CORRECT(pkt)
+#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \
+  NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt)
+#endif /* __DOXYGEN__ */
+
+
+/* Packet attribute access functions. */
+
+/** Return a pointer to the metadata for a packet.
+ * @ingroup ingress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_M" suffix usually improves performance.  This
+ * function must be called on an 'ingress' packet (i.e. one retrieved
+ * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer have not been called). Use of this
+ * function on an 'egress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_metadata_t*
+NETIO_PKT_METADATA(netio_pkt_t* pkt)
+{
+  netio_assert(!pkt->__packet.bits.__minimal);
+  return &pkt->__metadata;
+}
+
+
+/** Return a pointer to the minimal metadata for a packet.
+ * @ingroup egress
+ *
+ * Calling this function once and passing the result to other retrieval
+ * functions with a "_MM" suffix usually improves performance.  This
+ * function must be called on an 'egress' packet (i.e. one on which
+ * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer()
+ * have been called, or one retrieved by @ref netio_get_buffer()). Use of
+ * this function on an 'ingress' packet will cause an assertion failure.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's standard metadata.
+ */
+static __inline netio_pkt_minimal_metadata_t*
+NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+  netio_assert(pkt->__packet.bits.__minimal);
+  return (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+}
+
+
+/** Determine whether a packet has 'minimal' metadata.
+ * @ingroup pktfuncs
+ *
+ * This function will return nonzero if the packet is an 'egress'
+ * packet (i.e. one on which @ref netio_populate_buffer() or
+ * @ref netio_populate_prepend_buffer() have been called, or one
+ * retrieved by @ref netio_get_buffer()), and zero if the packet
+ * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(),
+ * which has not been converted into an 'egress' packet).
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet has minimal metadata.
+ */
+static __inline unsigned int
+NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt)
+{
+  return pkt->__packet.bits.__minimal;
+}
+
+
+/** Return a handle for a packet's storage.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A handle for the packet's storage.
+ */
+static __inline netio_pkt_handle_t
+NETIO_PKT_HANDLE(netio_pkt_t* pkt)
+{
+  netio_pkt_handle_t h;
+  h.word = pkt->__packet.word;
+  return h;
+}
+
+
+/** A special reserved value indicating the absence of a packet handle.
+ *
+ * @ingroup pktfuncs
+ */
+#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 })
+
+
+/** Test whether a packet handle is valid.
+ *
+ * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE
+ * to indicate no packet at all.  This function tests to see if a packet
+ * handle is a real handle, not this special reserved value.
+ *
+ * @ingroup pktfuncs
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return One if the packet handle is valid, else zero.
+ */
+static __inline unsigned int
+NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle)
+{
+  return handle.word != 0;
+}
+
+
+
+/** Return a pointer to the start of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] handle Handle on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle)
+{
+  return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING;
+}
+
+
+/** Return the length of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  /*
+   * Note that we effectively need to extract a quantity from the flags word
+   * which is measured in words, and then turn it into bytes by shifting
+   * it left by 2.  We do this all at once by just shifting right two less
+   * bits, and shifting the mask up two bits.
+   */
+  return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) &
+          (_NETIO_PKT_CUSTOM_LEN_RMASK << 2));
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size -
+          NETIO_PACKET_PADDING);
+}
+
+
+/** Return a pointer to the start of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt));
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  /*
+   * Note that we effectively need to extract a quantity from the flags word
+   * which is measured in words, and then turn it into bytes by shifting
+   * it left by 2.  We do this all at once by just shifting right two less
+   * bits, and shifting the mask up two bits.  We then add two bytes.
+   */
+  return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) &
+          (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2;
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) -
+          NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the start of the packet's L2 (Ethernet) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) +
+          NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally,
+ *  the IP) header.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_LENGTH_M(mda, pkt) -
+          NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup ingress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_DATA_M(mda, pkt) +
+          NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt));
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero.  (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.)  The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals.  Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__packet_ordinal;
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values.  In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.)  The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals.  Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__group_ordinal;
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value will be zero if the packet does not have a VLAN tag, or if
+ * this value was not extracted from the packet.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK;
+  unsigned short* pkt_p;
+  int index;
+  unsigned short val;
+
+  if (vl == _NETIO_PKT_VLAN_NONE)
+    return 0;
+
+  pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+  index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+  val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) &
+              _NETIO_PKT_INFO_VLAN_RMASK];
+
+#ifdef __TILECC__
+  return (__insn_bytex(val) >> 16) & 0xFFF;
+#else
+  return (__builtin_bswap32(val) >> 16) & 0xFFF;
+#endif
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is usually contained within the packet header.
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt);
+  int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK;
+
+  unsigned short val =
+    pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) &
+          _NETIO_PKT_INFO_ETYPE_RMASK];
+
+  return __builtin_bswap32(val) >> 16;
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__flow_hash;
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__user_data_0;
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return mda->__user_data_1;
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ *  be correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags &
+           (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK));
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ *  correct.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags &
+           (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK));
+}
+
+
+/** Determine whether the ethertype was recognized and L3 packet data was
+ *  processed.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the ethertype was recognized and L3 packet data was
+ *   processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK);
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length.  Normally, applications should use
+ * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ *  its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) &&
+          (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) ||
+           NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD));
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return mmd->l2_length;
+}
+
+
+/** Return the length of the L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+                              netio_pkt_t* pkt)
+{
+  return mmd->l3_offset - mmd->l2_offset;
+}
+
+
+/** Return the length of the packet, starting with the L3 (IP) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) -
+          NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt));
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup egress
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return _NETIO_PKT_BASE(pkt) + mmd->l3_offset;
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return _NETIO_PKT_BASE(pkt) + mmd->l2_offset;
+}
+
+
+/** Retrieve the status of a packet and any errors that may have occurred
+ * during ingress processing (length mismatches, CRC errors, etc.).
+ * @ingroup ingress
+ *
+ * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns zero are always reported as underlength, as there is no a priori
+ * means to determine their length.  Normally, applications should use
+ * @ref NETIO_PKT_BAD() instead of explicitly checking status with this
+ * function.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's status.
+ */
+static __inline netio_pkt_status_t
+NETIO_PKT_STATUS(netio_pkt_t* pkt)
+{
+  netio_assert(!pkt->__packet.bits.__minimal);
+
+  return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status;
+}
+
+
+/** Report whether a packet is bad (i.e., was shorter than expected based on
+ *  its headers, or had a bad CRC).
+ * @ingroup ingress
+ *
+ * Note that this function does not verify L3 or L4 checksums.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the packet is bad and should be discarded.
+ */
+static __inline unsigned int
+NETIO_PKT_BAD(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_BAD_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's custom header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the length of the packet, starting with the custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return  The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt);
+}
+
+
+/** Return a pointer to the packet's custom header.
+ *  A custom header may or may not be present, depending upon the IPP; its
+ *  contents and alignment are also IPP-dependent.  Currently, none of the
+ *  standard IPPs supplied by Tilera produce a custom header.  If present,
+ *  the custom header precedes the L2 header in the packet buffer.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_CUSTOM_DATA_M(mda, pkt);
+}
+
+
+/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The length of the packet's L2 header, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return the length of the packet, starting with the L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return  The length of the packet, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return a pointer to the packet's L2 (Ethernet) header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to start of the packet.
+ */
+static __inline unsigned char*
+NETIO_PKT_L2_DATA(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L2_DATA_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L2_DATA_M(mda, pkt);
+  }
+}
+
+
+/** Retrieve the length of the packet, starting with the L3 (generally, the IP)
+ * header.
+ * @ingroup pktfuncs
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Length of the packet's L3 header and data, in bytes.
+ */
+static __inline netio_size_t
+NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L3_LENGTH_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L3_LENGTH_M(mda, pkt);
+  }
+}
+
+
+/** Return a pointer to the packet's L3 (generally, the IP) header.
+ * @ingroup pktfuncs
+ *
+ * Note that we guarantee word alignment of the L3 header.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return A pointer to the packet's L3 header.
+ */
+static __inline unsigned char*
+NETIO_PKT_L3_DATA(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_L3_DATA_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_L3_DATA_M(mda, pkt);
+  }
+}
+
+
+/** Return the ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given an ordinal number when it is delivered by the IPP.
+ * In the medium term, the ordinal is unique and monotonically increasing,
+ * being incremented by 1 for each packet; the ordinal of the first packet
+ * delivered after the IPP starts is zero.  (Since the ordinal is of finite
+ * size, given enough input packets, it will eventually wrap around to zero;
+ * in the long term, therefore, ordinals are not unique.)  The ordinals
+ * handed out by different IPPs are not disjoint, so two packets from
+ * different IPPs may have identical ordinals.  Packets dropped by the
+ * IPP or by the I/O shim are not assigned ordinals.
+ *
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP packet ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_ORDINAL(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the per-group ordinal of the packet.
+ * @ingroup ingress
+ *
+ * Each packet is given a per-group ordinal number when it is
+ * delivered by the IPP. By default, the group is the packet's VLAN,
+ * although IPP can be recompiled to use different values.  In
+ * the medium term, the ordinal is unique and monotonically
+ * increasing, being incremented by 1 for each packet; the ordinal of
+ * the first packet distributed to a particular group is zero.
+ * (Since the ordinal is of finite size, given enough input packets,
+ * it will eventually wrap around to zero; in the long term,
+ * therefore, ordinals are not unique.)  The ordinals handed out by
+ * different IPPs are not disjoint, so two packets from different IPPs
+ * may have identical ordinals; similarly, packets distributed to
+ * different groups may have identical ordinals.  Packets dropped by
+ * the IPP or by the I/O shim are not assigned ordinals.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's per-IPP, per-group ordinal.
+ */
+static __inline unsigned int
+NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt);
+}
+
+
+/** Return the VLAN ID assigned to the packet.
+ * @ingroup ingress
+ *
+ * This is usually also contained within the packet header.  If the packet
+ * does not have a VLAN tag, the VLAN ID returned by this function is zero.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's VLAN ID.
+ */
+static __inline unsigned short
+NETIO_PKT_VLAN_ID(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_VLAN_ID_M(mda, pkt);
+}
+
+
+/** Return the ethertype of the packet.
+ * @ingroup ingress
+ *
+ * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED()
+ * returns true, and otherwise, may not be well defined.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's ethertype.
+ */
+static __inline unsigned short
+NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ETHERTYPE_M(mda, pkt);
+}
+
+
+/** Return the flow hash computed on the packet.
+ * @ingroup ingress
+ *
+ * For TCP and UDP packets, this hash is calculated by hashing together
+ * the "5-tuple" values, specifically the source IP address, destination
+ * IP address, protocol type, source port and destination port.
+ * The hash value is intended to be helpful for millions of distinct
+ * flows.
+ *
+ * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is
+ * derived by hashing together the source and destination IP addresses.
+ *
+ * For MPLS-encapsulated packets, the flow hash is derived by hashing
+ * the first MPLS label.
+ *
+ * For all other packets the flow hash is computed from the source
+ * and destination Ethernet addresses.
+ *
+ * The hash is symmetric, meaning it produces the same value if the
+ * source and destination are swapped. The only exceptions are
+ * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple
+ * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32
+ * (Encap Security Payload), which use only the destination address
+ * since the source address is not meaningful.
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's 32-bit flow hash.
+ */
+static __inline unsigned int
+NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_FLOW_HASH_M(mda, pkt);
+}
+
+
+/** Return the first word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first
+ * word of user data contains the least significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_low()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's first word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_USER_DATA_0_M(mda, pkt);
+}
+
+
+/** Return the second word of "user data" for the packet.
+ *
+ * The contents of the user data words depend on the IPP.
+ *
+ * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second
+ * word of user data contains the most significant bits of the 64-bit
+ * arrival cycle count (see @c get_cycle_count_high()).
+ *
+ * See the <em>System Programmer's Guide</em> for details.
+ *
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return The packet's second word of "user data".
+ */
+static __inline unsigned int
+NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_USER_DATA_1_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L4 checksum was calculated.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to
+ *  be correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the L3 (IP) checksum was calculated.
+*/
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt);
+}
+
+
+/** Determine whether the L3 (IP) checksum was calculated and found to be
+ *  correct.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the checksum was calculated and is correct.
+ */
+static __inline unsigned int
+NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt);
+}
+
+
+/** Determine whether the Ethertype was recognized and L3 packet data was
+ *  processed.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ * @return Nonzero if the Ethertype was recognized and L3 packet data was
+ *   processed.
+ */
+static __inline unsigned int
+NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt)
+{
+  netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+  return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt);
+}
+
+
+/** Set an egress packet's L2 length, using a metadata pointer to speed the
+ * computation.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt,
+                           int len)
+{
+  mmd->l2_length = len;
+}
+
+
+/** Set an egress packet's L2 length.
+ * @ingroup egress
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set an egress packet's L2 header length, using a metadata pointer to
+ *  speed the computation.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet.  It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd,
+                                  netio_pkt_t* pkt, int len)
+{
+  mmd->l3_offset = mmd->l2_offset + len;
+}
+
+
+/** Set an egress packet's L2 header length.
+ * @ingroup egress
+ *
+ * It is not normally necessary to call this routine; only the L2 length,
+ * not the header length, is needed to transmit a packet.  It may be useful if
+ * the egress packet will later be processed by code which expects to use
+ * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] len Packet L2 header length, in bytes.
+ */
+static __inline void
+NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len);
+}
+
+
+/** Set up an egress packet for hardware checksum computation, using a
+ *  metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ *  NetIO provides the ability to automatically calculate a standard
+ *  16-bit Internet checksum on transmitted packets.  The application
+ *  may specify the point in the packet where the checksum starts, the
+ *  number of bytes to be checksummed, and the two bytes in the packet
+ *  which will be replaced with the completed checksum.  (If the range
+ *  of bytes to be checksummed includes the bytes to be replaced, the
+ *  initial values of those bytes will be included in the checksum.)
+ *
+ *  For some protocols, the packet checksum covers data which is not present
+ *  in the packet, or is at least not contiguous to the main data payload.
+ *  For instance, the TCP checksum includes a "pseudo-header" which includes
+ *  the source and destination IP addresses of the packet.  To accommodate
+ *  this, the checksum engine may be "seeded" with an initial value, which
+ *  the application would need to compute based on the specific protocol's
+ *  requirements.  Note that the seed is given in host byte order (little-
+ *  endian), not network byte order (big-endian); code written to compute a
+ *  pseudo-header checksum in network byte order will need to byte-swap it
+ *  before use as the seed.
+ *
+ *  Note that the checksum is computed as part of the transmission process,
+ *  so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ *   the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ *   the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ *   to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ *   packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd,
+                            netio_pkt_t* pkt, int start, int length,
+                            int location, uint16_t seed)
+{
+  mmd->csum_start = start;
+  mmd->csum_length = length;
+  mmd->csum_location = location;
+  mmd->csum_seed = seed;
+  mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK;
+}
+
+
+/** Set up an egress packet for hardware checksum computation.
+ * @ingroup egress
+ *
+ *  NetIO provides the ability to automatically calculate a standard
+ *  16-bit Internet checksum on transmitted packets.  The application
+ *  may specify the point in the packet where the checksum starts, the
+ *  number of bytes to be checksummed, and the two bytes in the packet
+ *  which will be replaced with the completed checksum.  (If the range
+ *  of bytes to be checksummed includes the bytes to be replaced, the
+ *  initial values of those bytes will be included in the checksum.)
+ *
+ *  For some protocols, the packet checksum covers data which is not present
+ *  in the packet, or is at least not contiguous to the main data payload.
+ *  For instance, the TCP checksum includes a "pseudo-header" which includes
+ *  the source and destination IP addresses of the packet.  To accommodate
+ *  this, the checksum engine may be "seeded" with an initial value, which
+ *  the application would need to compute based on the specific protocol's
+ *  requirements.  Note that the seed is given in host byte order (little-
+ *  endian), not network byte order (big-endian); code written to compute a
+ *  pseudo-header checksum in network byte order will need to byte-swap it
+ *  before use as the seed.
+ *
+ *  Note that the checksum is computed as part of the transmission process,
+ *  so it will not be present in the packet upon completion of this routine.
+ *
+ * @param[in,out] pkt Packet on which to operate.
+ * @param[in] start Offset within L2 packet of the first byte to include in
+ *   the checksum.
+ * @param[in] length Number of bytes to include in the checksum.
+ *   the checksum.
+ * @param[in] location Offset within L2 packet of the first of the two bytes
+ *   to be replaced with the calculated checksum.
+ * @param[in] seed Initial value of the running checksum before any of the
+ *   packet data is added.
+ */
+static __inline void
+NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length,
+                         int location, uint16_t seed)
+{
+  netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+  NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ *  metadata pointer to speed the operation.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ *
+ * @param[in,out] mda Pointer to packet's standard metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+  return (pkt->__packet.bits.__offset << 6) +
+         NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt);
+}
+
+
+/** Return the number of bytes which could be prepended to a packet, using a
+ *  metadata pointer to speed the operation.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ * @ingroup egress
+ *
+ * @param[in,out] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt)
+{
+  return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset;
+}
+
+
+/** Return the number of bytes which could be prepended to a packet.
+ *  See @ref netio_populate_prepend_buffer() to get a full description of
+ *  prepending.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline int
+NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt)
+{
+  if (NETIO_PKT_IS_MINIMAL(pkt))
+  {
+    netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt);
+
+    return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt);
+  }
+  else
+  {
+    netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt);
+
+    return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt);
+  }
+}
+
+
+/** Flush a packet's minimal metadata from the cache, using a metadata pointer
+ *  to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                    netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache, using a metadata
+ *  pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                  netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache,
+ *  using a metadata pointer to speed the operation.
+ * @ingroup egress
+ *
+ * @param[in] mmd Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd,
+                                        netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache, using a metadata pointer
+ *  to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's minimal metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache, using a metadata
+ *  pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache,
+ *  using a metadata pointer to speed the operation.
+ * @ingroup ingress
+ *
+ * @param[in] mda Pointer to packet's metadata.
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's minimal metadata from the cache.
+ * @ingroup egress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+
+/** Flush and then invalidate a packet's metadata from the cache.
+ * @ingroup ingress
+ *
+ * @param[in] pkt Packet on which to operate.
+ */
+static __inline void
+NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt)
+{
+}
+
+/** Number of NUMA nodes we can distribute buffers to.
+ * @ingroup setup */
+#define NETIO_NUM_NODE_WEIGHTS  16
+
+/**
+ * @brief An object for specifying the characteristics of NetIO communication
+ * endpoint.
+ *
+ * @ingroup setup
+ *
+ * The @ref netio_input_register() function uses this structure to define
+ * how an application tile will communicate with an IPP.
+ *
+ *
+ * Future updates to NetIO may add new members to this structure,
+ * which can affect the success of the registration operation.  Thus,
+ * if dynamically initializing the structure, applications are urged to
+ * zero it out first, for example:
+ *
+ * @code
+ * netio_input_config_t config;
+ * memset(&config, 0, sizeof (config));
+ * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE;
+ * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS;
+ * config.queue_id = 0;
+ *     .
+ *     .
+ *     .
+ * @endcode
+ *
+ * since that guarantees that any unused structure members, including
+ * members which did not exist when the application was first developed,
+ * will not have unexpected values.
+ *
+ * If statically initializing the structure, we strongly recommend use of
+ * C99-style named initializers, for example:
+ *
+ * @code
+ * netio_input_config_t config = {
+ *    .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE,
+ *    .num_receive_packets = NETIO_MAX_RECEIVE_PKTS,
+ *    .queue_id = 0,
+ * },
+ * @endcode
+ *
+ * instead of the old-style structure initialization:
+ *
+ * @code
+ * // Bad example! Currently equivalent to the above, but don't do this.
+ * netio_input_config_t config = {
+ *    NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0
+ * },
+ * @endcode
+ *
+ * since the C99 style requires no changes to the code if elements of the
+ * config structure are rearranged.  (It also makes the initialization much
+ * easier to understand.)
+ *
+ * Except for items which address a particular tile's transmit or receive
+ * characteristics, such as the ::NETIO_RECV flag, applications are advised
+ * to specify the same set of configuration data on all registrations.
+ * This prevents differing results if multiple tiles happen to do their
+ * registration operations in a different order on different invocations of
+ * the application.  This is particularly important for things like link
+ * management flags, and buffer size and homing specifications.
+ *
+ * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO
+ * buffer pool is automatically created and mapped into the application's
+ * virtual address space at an address chosen by the operating system,
+ * using the common memory (cmem) facility in the Tilera Multicore
+ * Components library.  The cmem facility allows multiple processes to gain
+ * access to shared memory which is mapped into each process at an
+ * identical virtual address.  In order for this to work, the processes
+ * must have a common ancestor, which must create the common memory using
+ * tmc_cmem_init().
+ *
+ * In programs using the iLib process creation API, or in programs which use
+ * only one process (which include programs using the pthreads library),
+ * tmc_cmem_init() is called automatically.  All other applications
+ * must call it explicitly, before any child processes which might call
+ * netio_input_register() are created.
+ */
+typedef struct
+{
+  /** Registration characteristics.
+
+      This value determines several characteristics of the registration;
+      flags for different types of behavior are ORed together to make the
+      final flag value.  Generally applications should specify exactly
+      one flag from each of the following categories:
+
+      - Whether the application will be receiving packets on this queue
+        (::NETIO_RECV or ::NETIO_NO_RECV).
+
+      - Whether the application will be transmitting packets on this queue,
+        and if so, whether it will request egress checksum calculation
+        (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT).  It is
+        legal to call netio_get_buffer() without one of the XMIT flags,
+        as long as ::NETIO_RECV is specified; in this case, the retrieved
+        buffers must be passed to another tile for transmission.
+
+      - Whether the application expects any vendor-specific tags in
+        its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM,
+        or ::NETIO_TAG_MRVL).  This must match the configuration of the
+        target IPP.
+
+      To accommodate applications written to previous versions of the NetIO
+      interface, none of the flags above are currently required; if omitted,
+      NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM |
+      ::NETIO_TAG_NONE were used.  However, explicit specification of
+      the relevant flags allows NetIO to do a better job of resource
+      allocation, allows earlier detection of certain configuration errors,
+      and may enable advanced features or higher performance in the future,
+      so their use is strongly recommended.
+
+      Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT
+      is a special case, intended primarily for use by programs which
+      retrieve network statistics or do link management operations.
+      When these flags are both specified, the resulting queue may not
+      be used with NetIO routines other than netio_get(), netio_set(),
+      and netio_input_unregister().  See @ref link for more information
+      on link management.
+
+      Other flags are optional; their use is described below.
+  */
+  int flags;
+
+  /** Interface name.  This is a string which identifies the specific
+      Ethernet controller hardware to be used.  The format of the string
+      is a device type and a device index, separated by a slash; so,
+      the first 10 Gigabit Ethernet controller is named "xgbe/0", while
+      the second 10/100/1000 Megabit Ethernet controller is named "gbe/1".
+   */
+  const char* interface;
+
+  /** Receive packet queue size.  This specifies the maximum number
+      of ingress packets that can be received on this queue without
+      being retrieved by @ref netio_get_packet().  If the IPP's distribution
+      algorithm calls for a packet to be sent to this queue, and this
+      number of packets are already pending there, the new packet
+      will either be discarded, or sent to another tile registered
+      for the same queue_id (see @ref drops).  This value must
+      be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least
+      ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain
+      interfaces.
+   */
+  int num_receive_packets;
+
+  /** The queue ID being requested.  Legal values for this range from 0
+      to ::NETIO_MAX_QUEUE_ID, inclusive.  ::NETIO_MAX_QUEUE_ID is always
+      greater than or equal to the number of tiles; this allows one queue
+      for each tile, plus at least one additional queue.  Some applications
+      may wish to use the additional queue as a destination for unwanted
+      packets, since packets delivered to queues for which no tiles have
+      registered are discarded.
+   */
+  unsigned int queue_id;
+
+  /** Maximum number of small send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds
+      empty small egress buffers requested from the IPP but not yet
+      retrieved via @ref netio_get_buffer().  This value must be greater
+      than zero if the application will ever use @ref netio_get_buffer()
+      to allocate empty small egress buffers; it may be no larger than
+      ::NETIO_MAX_SEND_BUFFERS.  See @ref epp for more details on empty
+      buffer caching.
+   */
+  int num_send_buffers_small_total;
+
+  /** Number of small send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty small egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_small_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_small_prealloc;
+
+  /** Maximum number of large send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds empty
+      large egress buffers requested from the IPP but not yet retrieved via
+      @ref netio_get_buffer().  This value must be greater than zero if the
+      application will ever use @ref netio_get_buffer() to allocate empty
+      large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+      See @ref epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_large_total;
+
+  /** Number of large send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty large egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_large_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_large_prealloc;
+
+  /** Maximum number of jumbo send buffers to be held in the local empty
+      buffer cache.  This specifies the size of the area which holds empty
+      jumbo egress buffers requested from the IPP but not yet retrieved via
+      @ref netio_get_buffer().  This value must be greater than zero if the
+      application will ever use @ref netio_get_buffer() to allocate empty
+      jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS.
+      See @ref epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_jumbo_total;
+
+  /** Number of jumbo send buffers to be preallocated at registration.
+      If this value is nonzero, the specified number of empty jumbo egress
+      buffers will be requested from the IPP during the netio_input_register
+      operation; this may speed the execution of @ref netio_get_buffer().
+      This may be no larger than @ref num_send_buffers_jumbo_total.  See @ref
+      epp for more details on empty buffer caching.
+   */
+  int num_send_buffers_jumbo_prealloc;
+
+  /** Total packet buffer size.  This determines the total size, in bytes,
+      of the NetIO buffer pool.  Note that the maximum number of available
+      buffers of each size is determined during hypervisor configuration
+      (see the <em>System Programmer's Guide</em> for details); this just
+      influences how much host memory is allocated for those buffers.
+
+      The buffer pool is allocated from common memory, which will be
+      automatically initialized if needed.  If your buffer pool is larger
+      than 240 MB, you might need to explicitly call @c tmc_cmem_init(),
+      as described in the Application Libraries Reference Manual (UG227).
+
+      Packet buffers are currently allocated in chunks of 16 MB; this
+      value will be rounded up to the next larger multiple of 16 MB.
+      If this value is zero, a default of 32 MB will be used; this was
+      the value used by previous versions of NetIO.  Note that taking this
+      default also affects the placement of buffers on Linux NUMA nodes.
+      See @ref buffer_node_weights for an explanation of buffer placement.
+
+      In order to successfully allocate packet buffers, Linux must have
+      available huge pages on the relevant Linux NUMA nodes.  See the
+      <em>System Programmer's Guide</em> for information on configuring
+      huge page support in Linux.
+   */
+  uint64_t total_buffer_size;
+
+  /** Buffer placement weighting factors.
+
+      This array specifies the relative amount of buffering to place
+      on each of the available Linux NUMA nodes.  This array is
+      indexed by the NUMA node, and the values in the array are
+      proportional to the amount of buffer space to allocate on that
+      node.
+
+      If memory striping is enabled in the Hypervisor, then there is
+      only one logical NUMA node (node 0). In that case, NetIO will by
+      default ignore the suggested buffer node weights, and buffers
+      will be striped across the physical memory controllers. See
+      UG209 System Programmer's Guide for a description of the
+      hypervisor option that controls memory striping.
+
+      If memory striping is disabled, then there are up to four NUMA
+      nodes, corresponding to the four DDRAM controllers in the TILE
+      processor architecture.  See UG100 Tile Processor Architecture
+      Overview for a diagram showing the location of each of the DDRAM
+      controllers relative to the tile array.
+
+      For instance, if memory striping is disabled, the following
+      configuration strucure:
+
+      @code
+      netio_input_config_t config = {
+            .
+            .
+            .
+        .total_buffer_size = 4 * 16 * 1024 * 1024;
+        .buffer_node_weights = { 1, 0, 1, 0 },
+      },
+      @endcode
+
+      would result in 32 MB of buffers being placed on controller 0, and
+      32 MB on controller 2.  (Since buffers are allocated in units of
+      16 MB, some sets of weights will not be able to be matched exactly.)
+
+      For the weights to be effective, @ref total_buffer_size must be
+      nonzero.  If @ref total_buffer_size is zero, causing the default
+      32 MB of buffer space to be used, then any specified weights will
+      be ignored, and buffers will positioned as they were in previous
+      versions of NetIO:
+
+      - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1,
+        and the other 16 MB will be placed on controller 2.
+
+      - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2,
+        and the other 16 MB will be placed on controller 3.
+
+      If @ref total_buffer_size is nonzero, but all weights are zero,
+      then all buffer space will be allocated on Linux NUMA node zero.
+
+      By default, the specified buffer placement is treated as a hint;
+      if sufficient free memory is not available on the specified
+      controllers, the buffers will be allocated elsewhere.  However,
+      if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a
+      failure to allocate buffer space exactly as requested will cause the
+      registration operation to fail with an error of ::NETIO_CANNOT_HOME.
+
+      Note that maximal network performance cannot be achieved with
+      only one memory controller.
+   */
+  uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS];
+
+  /** Fixed virtual address for packet buffers.  Only valid when
+      ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the
+      description of that flag for details.
+   */
+  void* fixed_buffer_va;
+
+  /**
+      Maximum number of outstanding send packet requests.  This value is
+      only relevant when an EPP is in use; it determines the number of
+      slots in the EPP's outgoing packet queue which this tile is allowed
+      to consume, and thus the number of packets which may be sent before
+      the sending tile must wait for an acknowledgment from the EPP.
+      Modifying this value is generally only helpful when using @ref
+      netio_send_packet_vector(), where it can help improve performance by
+      allowing a single vector send operation to process more packets.
+      Typically it is not specified, and the default, which divides the
+      outgoing packet slots evenly between all tiles on the chip, is used.
+
+      If a registration asks for more outgoing packet queue slots than are
+      available, ::NETIO_TOOMANY_XMIT will be returned.  The total number
+      of packet queue slots which are available for all tiles for each EPP
+      is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING.
+
+
+      This value is ignored if ::NETIO_XMIT is not specified in flags.
+      If you want to specify a large value here for a specific tile, you are
+      advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so
+      that they do not consume a default number of packet slots.  Any tile
+      transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING
+      slots allocated to it; values less than that will be silently
+      increased by the NetIO library.
+   */
+  int num_sends_outstanding;
+}
+netio_input_config_t;
+
+
+/** Registration flags; used in the @ref netio_input_config_t structure.
+ * @addtogroup setup
+ */
+/** @{ */
+
+/** Fail a registration request if we can't put packet buffers
+    on the specified memory controllers. */
+#define NETIO_STRICT_HOMING   0x00000002
+
+/** This application expects no tags on its L2 headers. */
+#define NETIO_TAG_NONE        0x00000004
+
+/** This application expects Marvell extended tags on its L2 headers. */
+#define NETIO_TAG_MRVL        0x00000008
+
+/** This application expects Broadcom tags on its L2 headers. */
+#define NETIO_TAG_BRCM        0x00000010
+
+/** This registration may call routines which receive packets. */
+#define NETIO_RECV            0x00000020
+
+/** This registration may not call routines which receive packets. */
+#define NETIO_NO_RECV         0x00000040
+
+/** This registration may call routines which transmit packets. */
+#define NETIO_XMIT            0x00000080
+
+/** This registration may call routines which transmit packets with
+    checksum acceleration. */
+#define NETIO_XMIT_CSUM       0x00000100
+
+/** This registration may not call routines which transmit packets. */
+#define NETIO_NO_XMIT         0x00000200
+
+/** This registration wants NetIO buffers mapped at an application-specified
+    virtual address.
+
+    NetIO buffers are by default created by the TMC common memory facility,
+    which must be configured by a common ancestor of all processes sharing
+    a network interface.  When this flag is specified, NetIO buffers are
+    instead mapped at an address chosen by the application (and specified
+    in @ref netio_input_config_t::fixed_buffer_va).  This allows multiple
+    unrelated but cooperating processes to share a NetIO interface.
+    All processes sharing the same interface must specify this flag,
+    and all must specify the same fixed virtual address.
+
+    @ref netio_input_config_t::fixed_buffer_va must be a
+    multiple of 16 MB, and the packet buffers will occupy @ref
+    netio_input_config_t::total_buffer_size bytes of virtual address
+    space, beginning at that address.  If any of those virtual addresses
+    are currently occupied by other memory objects, like application or
+    shared library code or data, @ref netio_input_register() will return
+    ::NETIO_FAULT.  While it is impossible to provide a fixed_buffer_va
+    which will work for all applications, a good first guess might be to
+    use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size.
+    If that fails, it might be helpful to consult the running application's
+    virtual address description file (/proc/<em>pid</em>/maps) to see
+    which regions of virtual address space are available.
+ */
+#define NETIO_FIXED_BUFFER_VA 0x00000400
+
+/** This registration call will not complete unless the network link
+    is up.  The process will wait several seconds for this to happen (the
+    precise interval is link-dependent), but if the link does not come up,
+    ::NETIO_LINK_DOWN will be returned.  This flag is the default if
+    ::NETIO_NOREQUIRE_LINK_UP is not specified.  Note that this flag by
+    itself does not request that the link be brought up; that can be done
+    with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the
+    latter is the default if no NETIO_AUTO_LINK_xxx flags are specified),
+    or by explicitly setting the link's desired state via netio_set().
+    If the link is not brought up by one of those methods, and this flag
+    is specified, the registration operation will return ::NETIO_LINK_DOWN.
+    This flag is ignored if it is specified along with ::NETIO_NO_XMIT and
+    ::NETIO_NO_RECV.  See @ref link for more information on link
+    management.
+ */
+#define NETIO_REQUIRE_LINK_UP    0x00000800
+
+/** This registration call will complete even if the network link is not up.
+    Whenever the link is not up, packets will not be sent or received:
+    netio_get_packet() will return ::NETIO_NOPKT once all queued packets
+    have been drained, and netio_send_packet() and similar routines will
+    return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP
+    or the I/O shim is full.  See @ref link for more information on link
+    management.
+ */
+#define NETIO_NOREQUIRE_LINK_UP  0x00001000
+
+#ifndef __DOXYGEN__
+/*
+ * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags,
+ * but should not be used directly by applications, and are thus not
+ * documented.
+ */
+#define _NETIO_AUTO_UP        0x00002000
+#define _NETIO_AUTO_DN        0x00004000
+#define _NETIO_AUTO_PRESENT   0x00008000
+#endif
+
+/** Set the desired state of the link to up, allowing any speeds which are
+    supported by the link hardware, as part of this registration operation.
+    Do not take down the link automatically.  This is the default if
+    no other NETIO_AUTO_LINK_xxx flags are specified.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UP     (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP)
+
+/** Set the desired state of the link to up, allowing any speeds which are
+    supported by the link hardware, as part of this registration operation.
+    Set the desired state of the link to down the next time no tiles are
+    registered for packet reception or transmission.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_UPDN   (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \
+                                _NETIO_AUTO_DN)
+
+/** Set the desired state of the link to down the next time no tiles are
+    registered for packet reception or transmission.  This flag is ignored
+    if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV.
+    See @ref link for more information on link management.
+ */
+#define NETIO_AUTO_LINK_DN     (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN)
+
+/** Do not bring up the link automatically as part of this registration
+    operation.  Do not take down the link automatically.  This flag
+    is ignored if it is specified along with ::NETIO_NO_XMIT and
+    ::NETIO_NO_RECV.  See @ref link for more information on link management.
+  */
+#define NETIO_AUTO_LINK_NONE   _NETIO_AUTO_PRESENT
+
+
+/** Minimum number of receive packets. */
+#define NETIO_MIN_RECEIVE_PKTS            16
+
+/** Lower bound on the maximum number of receive packets; may be higher
+    than this on some interfaces. */
+#define NETIO_MAX_RECEIVE_PKTS           128
+
+/** Maximum number of send buffers, per packet size. */
+#define NETIO_MAX_SEND_BUFFERS            16
+
+/** Number of EPP queue slots, and thus outstanding sends, per EPP. */
+#define NETIO_TOTAL_SENDS_OUTSTANDING   2015
+
+/** Minimum number of EPP queue slots, and thus outstanding sends, per
+ *  transmitting tile. */
+#define NETIO_MIN_SENDS_OUTSTANDING       16
+
+
+/**@}*/
+
+#ifndef __DOXYGEN__
+
+/**
+ * An object for providing Ethernet packets to a process.
+ */
+struct __netio_queue_impl_t;
+
+/**
+ * An object for managing the user end of a NetIO queue.
+ */
+struct __netio_queue_user_impl_t;
+
+#endif /* !__DOXYGEN__ */
+
+
+/** A netio_queue_t describes a NetIO communications endpoint.
+ * @ingroup setup
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  uint8_t opaque[8];                 /**< This is an opaque structure. */
+#else
+  struct __netio_queue_impl_t* __system_part;    /**< The system part. */
+  struct __netio_queue_user_impl_t* __user_part; /**< The user part. */
+#ifdef _NETIO_PTHREAD
+  _netio_percpu_mutex_t lock;                    /**< Queue lock. */
+#endif
+#endif
+}
+netio_queue_t;
+
+
+/**
+ * @brief Packet send context.
+ *
+ * @ingroup egress
+ *
+ * Packet send context for use with netio_send_packet_prepare and _commit.
+ */
+typedef struct
+{
+#ifdef __DOXYGEN__
+  uint8_t opaque[44];   /**< This is an opaque structure. */
+#else
+  uint8_t flags;        /**< Defined below */
+  uint8_t datalen;      /**< Number of valid words pointed to by data. */
+  uint32_t request[9];  /**< Request to be sent to the EPP or shim.  Note
+                             that this is smaller than the 11-word maximum
+                             request size, since some constant values are
+                             not saved in the context. */
+  uint32_t *data;       /**< Data to be sent to the EPP or shim via IDN. */
+#endif
+}
+netio_send_pkt_context_t;
+
+
+#ifndef __DOXYGEN__
+#define SEND_PKT_CTX_USE_EPP   1  /**< We're sending to an EPP. */
+#define SEND_PKT_CTX_SEND_CSUM 2  /**< Request includes a checksum. */
+#endif
+
+/**
+ * @brief Packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * This data structure is used with netio_send_packet_vector() to send multiple
+ * packets with one NetIO call.  The structure should be initialized by
+ * calling netio_pkt_vector_set(), rather than by setting the fields
+ * directly.
+ *
+ * This structure is guaranteed to be a power of two in size, no
+ * bigger than one L2 cache line, and to be aligned modulo its size.
+ */
+typedef struct
+#ifndef __DOXYGEN__
+__attribute__((aligned(8)))
+#endif
+{
+  /** Reserved for use by the user application.  When initialized with
+   *  the netio_set_pkt_vector_entry() function, this field is guaranteed
+   *  to be visible to readers only after all other fields are already
+   *  visible.  This way it can be used as a valid flag or generation
+   *  counter. */
+  uint8_t user_data;
+
+  /* Structure members below this point should not be accessed directly by
+   * applications, as they may change in the future. */
+
+  /** Low 8 bits of the packet address to send.  The high bits are
+   *  acquired from the 'handle' field. */
+  uint8_t buffer_address_low;
+
+  /** Number of bytes to transmit. */
+  uint16_t size;
+
+  /** The raw handle from a netio_pkt_t.  If this is NETIO_PKT_HANDLE_NONE,
+   *  this vector entry will be skipped and no packet will be transmitted. */
+  netio_pkt_handle_t handle;
+}
+netio_pkt_vector_entry_t;
+
+
+/**
+ * @brief Initialize fields in a packet vector entry.
+ *
+ * @ingroup egress
+ *
+ * @param[out] v Pointer to the vector entry to be initialized.
+ * @param[in] pkt Packet to be transmitted when the vector entry is passed to
+ *        netio_send_packet_vector().  Note that the packet's attributes
+ *        (e.g., its L2 offset and length) are captured at the time this
+ *        routine is called; subsequent changes in those attributes will not
+ *        be reflected in the packet which is actually transmitted.
+ *        Changes in the packet's contents, however, will be so reflected.
+ *        If this is NULL, no packet will be transmitted.
+ * @param[in] user_data User data to be set in the vector entry.
+ *        This function guarantees that the "user_data" field will become
+ *        visible to a reader only after all other fields have become visible.
+ *        This allows a structure in a ring buffer to be written and read
+ *        by a polling reader without any locks or other synchronization.
+ */
+static __inline void
+netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt,
+                     uint8_t user_data)
+{
+  if (pkt)
+  {
+    if (NETIO_PKT_IS_MINIMAL(pkt))
+    {
+      netio_pkt_minimal_metadata_t* mmd =
+        (netio_pkt_minimal_metadata_t*) &pkt->__metadata;
+      v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF;
+      v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt);
+    }
+    else
+    {
+      netio_pkt_metadata_t* mda = &pkt->__metadata;
+      v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF;
+      v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt);
+    }
+    v->handle.word = pkt->__packet.word;
+  }
+  else
+  {
+    v->handle.word = 0;   /* Set handle to NETIO_PKT_HANDLE_NONE. */
+  }
+
+  __asm__("" : : : "memory");
+
+  v->user_data = user_data;
+}
+
+
+/**
+ * Flags and structures for @ref netio_get() and @ref netio_set().
+ * @ingroup config
+ */
+
+/** @{ */
+/** Parameter class; addr is a NETIO_PARAM_xxx value. */
+#define NETIO_PARAM       0
+/** Interface MAC address. This address is only valid with @ref netio_get().
+ *  The value is a 6-byte MAC address.  Depending upon the overall system
+ *  design, a MAC address may or may not be available for each interface. */
+#define NETIO_PARAM_MAC        0
+
+/** Determine whether to suspend output on the receipt of pause frames.
+ *  If the value is nonzero, the I/O shim will suspend output when a pause
+ *  frame is received.  If the value is zero, pause frames will be ignored. */
+#define NETIO_PARAM_PAUSE_IN   1
+
+/** Determine whether to send pause frames if the I/O shim packet FIFOs are
+ *  nearly full.  If the value is zero, pause frames are not sent.  If
+ *  the value is nonzero, it is the delay value which will be sent in any
+ *  pause frames which are output, in units of 512 bit times. */
+#define NETIO_PARAM_PAUSE_OUT  2
+
+/** Jumbo frame support.  The value is a 4-byte integer.  If the value is
+ *  nonzero, the MAC will accept frames of up to 10240 bytes.  If the value
+ *  is zero, the MAC will only accept frames of up to 1544 bytes. */
+#define NETIO_PARAM_JUMBO      3
+
+/** I/O shim's overflow statistics register.  The value is two 16-bit integers.
+ *  The first 16-bit value (or the low 16 bits, if the value is treated as a
+ *  32-bit number) is the count of packets which were completely dropped and
+ *  not delivered by the shim.  The second 16-bit value (or the high 16 bits,
+ *  if the value is treated as a 32-bit number) is the count of packets
+ *  which were truncated and thus only partially delivered by the shim.  This
+ *  register is automatically reset to zero after it has been read.
+ */
+#define NETIO_PARAM_OVERFLOW   4
+
+/** IPP statistics.  This address is only valid with @ref netio_get().  The
+ *  value is a netio_stat_t structure.  Unlike the I/O shim statistics, the
+ *  IPP statistics are not all reset to zero on read; see the description
+ *  of the netio_stat_t for details. */
+#define NETIO_PARAM_STAT 5
+
+/** Possible link state.  The value is a combination of "NETIO_LINK_xxx"
+ *  flags.  With @ref netio_get(), this will indicate which flags are
+ *  actually supported by the hardware.
+ *
+ *  For historical reasons, specifying this value to netio_set() will have
+ *  the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is
+ *  discouraged.
+ */
+#define NETIO_PARAM_LINK_POSSIBLE_STATE 6
+
+/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags.
+ *  With @ref netio_set(), this will attempt to immediately bring up the
+ *  link using whichever of the requested flags are supported by the
+ *  hardware, or take down the link if the flags are zero; if this is
+ *  not possible, an error will be returned.  Many programs will want
+ *  to use ::NETIO_PARAM_LINK_DESIRED_STATE instead.
+ *
+ *  For historical reasons, specifying this value to netio_get() will
+ *  have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE,
+ *  but this usage is discouraged.
+ */
+#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE
+
+/** Current link state. This address is only valid with @ref netio_get().
+ *  The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together.
+ *  If the link is down, the value ANDed with NETIO_LINK_SPEED will be
+ *  zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will
+ *  result in exactly one of the NETIO_LINK_xxx values, indicating the
+ *  current speed. */
+#define NETIO_PARAM_LINK_CURRENT_STATE 7
+
+/** Variant symbol for current state, retained for compatibility with
+ *  pre-MDE-2.1 programs. */
+#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE
+
+/** Packet Coherence protocol. This address is only valid with @ref netio_get().
+ *  The value is nonzero if the interface is configured for cache-coherent DMA.
+ */
+#define NETIO_PARAM_COHERENT 8
+
+/** Desired link state. The value is a conbination of "NETIO_LINK_xxx"
+ *  flags, which specify the desired state for the link.  With @ref
+ *  netio_set(), this will, in the background, attempt to bring up the link
+ *  using whichever of the requested flags are reasonable, or take down the
+ *  link if the flags are zero.  The actual link up or down operation may
+ *  happen after this call completes.  If the link state changes in the
+ *  future, the system will continue to try to get back to the desired link
+ *  state; for instance, if the link is brought up successfully, and then
+ *  the network cable is disconnected, the link will go down.  However, the
+ *  desired state of the link is still up, so if the cable is reconnected,
+ *  the link will be brought up again.
+ *
+ *  With @ref netio_get(), this will indicate the desired state for the
+ *  link, as set with a previous netio_set() call, or implicitly by a
+ *  netio_input_register() or netio_input_unregister() operation.  This may
+ *  not reflect the current state of the link; to get that, use
+ *  ::NETIO_PARAM_LINK_CURRENT_STATE. */
+#define NETIO_PARAM_LINK_DESIRED_STATE 9
+
+/** NetIO statistics structure.  Retrieved using the ::NETIO_PARAM_STAT
+ *  address passed to @ref netio_get(). */
+typedef struct
+{
+  /** Number of packets which have been received by the IPP and forwarded
+   *  to a tile's receive queue for processing.  This value wraps at its
+   *  maximum, and is not cleared upon read. */
+  uint32_t packets_received;
+
+  /** Number of packets which have been dropped by the IPP, because they could
+   *  not be received, or could not be forwarded to a tile.  The former happens
+   *  when the IPP does not have a free packet buffer of suitable size for an
+   *  incoming frame.  The latter happens when all potential destination tiles
+   *  for a packet, as defined by the group, bucket, and queue configuration,
+   *  have full receive queues.   This value wraps at its maximum, and is not
+   *  cleared upon read. */
+  uint32_t packets_dropped;
+
+  /*
+   * Note: the #defines after each of the following four one-byte values
+   * denote their location within the third word of the netio_stat_t.  They
+   * are intended for use only by the IPP implementation and are thus omitted
+   * from the Doxygen output.
+   */
+
+  /** Number of packets dropped because no worker was able to accept a new
+   *  packet.  This value saturates at its maximum, and is cleared upon
+   *  read. */
+  uint8_t drops_no_worker;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_WORKER   0
+#endif
+
+  /** Number of packets dropped because no small buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_smallbuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_SMALLBUF 1
+#endif
+
+  /** Number of packets dropped because no large buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_largebuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_LARGEBUF 2
+#endif
+
+  /** Number of packets dropped because no jumbo buffers were available.
+   *  This value saturates at its maximum, and is cleared upon read. */
+  uint8_t drops_no_jumbobuf;
+#ifndef __DOXYGEN__
+#define NETIO_STAT_DROPS_NO_JUMBOBUF 3
+#endif
+}
+netio_stat_t;
+
+
+/** Link can run, should run, or is running at 10 Mbps. */
+#define NETIO_LINK_10M         0x01
+
+/** Link can run, should run, or is running at 100 Mbps. */
+#define NETIO_LINK_100M        0x02
+
+/** Link can run, should run, or is running at 1 Gbps. */
+#define NETIO_LINK_1G          0x04
+
+/** Link can run, should run, or is running at 10 Gbps. */
+#define NETIO_LINK_10G         0x08
+
+/** Link should run at the highest speed supported by the link and by
+ *  the device connected to the link.  Only usable as a value for
+ *  the link's desired state; never returned as a value for the current
+ *  or possible states. */
+#define NETIO_LINK_ANYSPEED    0x10
+
+/** All legal link speeds. */
+#define NETIO_LINK_SPEED  (NETIO_LINK_10M  | \
+                           NETIO_LINK_100M | \
+                           NETIO_LINK_1G   | \
+                           NETIO_LINK_10G  | \
+                           NETIO_LINK_ANYSPEED)
+
+
+/** MAC register class.  Addr is a register offset within the MAC.
+ *  Registers within the XGbE and GbE MACs are documented in the Tile
+ *  Processor I/O Device Guide (UG104). MAC registers start at address
+ *  0x4000, and do not include the MAC_INTERFACE registers. */
+#define NETIO_MAC             1
+
+/** MDIO register class (IEEE 802.3 clause 22 format).  Addr is the "addr"
+ *  member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO            2
+
+/** MDIO register class (IEEE 802.3 clause 45 format).  Addr is the "addr"
+ *  member of a netio_mdio_addr_t structure. */
+#define NETIO_MDIO_CLAUSE45   3
+
+/** NetIO MDIO address type.  Retrieved or provided using the ::NETIO_MDIO
+ *  address passed to @ref netio_get() or @ref netio_set(). */
+typedef union
+{
+  struct
+  {
+    unsigned int reg:16;  /**< MDIO register offset.  For clause 22 access,
+                               must be less than 32. */
+    unsigned int phy:5;   /**< Which MDIO PHY to access. */
+    unsigned int dev:5;   /**< Which MDIO device to access within that PHY.
+                               Applicable for clause 45 access only; ignored
+                               for clause 22 access. */
+  }
+  bits;                   /**< Container for bitfields. */
+  uint64_t addr;          /**< Value to pass to @ref netio_get() or
+                           *   @ref netio_set(). */
+}
+netio_mdio_addr_t;
+
+/** @} */
+
+#endif /* __NETIO_INTF_H__ */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 78e1982cb6c9..0b9ce69b0ee5 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -988,8 +988,12 @@ static long __write_once initfree = 1;
 /* Select whether to free (1) or mark unusable (0) the __init pages. */
 static int __init set_initfree(char *str)
 {
-	strict_strtol(str, 0, &initfree);
-	pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't");
+	long val;
+	if (strict_strtol(str, 0, &val)) {
+		initfree = val;
+		pr_info("initfree: %s free init pages\n",
+			initfree ? "will" : "won't");
+	}
 	return 1;
 }
 __setup("initfree=", set_initfree);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f6668cdaac85..43db398437b7 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2945,6 +2945,18 @@ source "drivers/s390/net/Kconfig"
 
 source "drivers/net/caif/Kconfig"
 
+config TILE_NET
+	tristate "Tilera GBE/XGBE network driver support"
+	depends on TILE
+	default y
+	select CRC32
+	help
+	  This is a standard Linux network device driver for the
+	  on-chip Tilera Gigabit Ethernet and XAUI interfaces.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called tile_net.
+
 config XEN_NETDEV_FRONTEND
 	tristate "Xen network device frontend driver"
 	depends on XEN
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 652fc6b98039..b90738d13994 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -301,3 +301,4 @@ obj-$(CONFIG_CAIF) += caif/
 
 obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/
 obj-$(CONFIG_PCH_GBE) += pch_gbe/
+obj-$(CONFIG_TILE_NET) += tile/
diff --git a/drivers/net/tile/Makefile b/drivers/net/tile/Makefile
new file mode 100644
index 000000000000..f634f142cab4
--- /dev/null
+++ b/drivers/net/tile/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the TILE on-chip networking support.
+#
+
+obj-$(CONFIG_TILE_NET) += tile_net.o
+ifdef CONFIG_TILEGX
+tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o
+else
+tile_net-objs := tilepro.o
+endif
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c
new file mode 100644
index 000000000000..0e6bac5ec65b
--- /dev/null
+++ b/drivers/net/tile/tilepro.c
@@ -0,0 +1,2406 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>      /* printk() */
+#include <linux/slab.h>        /* kmalloc() */
+#include <linux/errno.h>       /* error codes */
+#include <linux/types.h>       /* size_t */
+#include <linux/interrupt.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>   /* struct device, and other headers */
+#include <linux/etherdevice.h> /* eth_type_trans */
+#include <linux/skbuff.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/hugetlb.h>
+#include <linux/in6.h>
+#include <linux/timer.h>
+#include <linux/io.h>
+#include <asm/checksum.h>
+#include <asm/homecache.h>
+
+#include <hv/drv_xgbe_intf.h>
+#include <hv/drv_xgbe_impl.h>
+#include <hv/hypervisor.h>
+#include <hv/netio_intf.h>
+
+/* For TSO */
+#include <linux/ip.h>
+#include <linux/tcp.h>
+
+
+/* There is no singlethread_cpu, so schedule work on the current cpu. */
+#define singlethread_cpu -1
+
+
+/*
+ * First, "tile_net_init_module()" initializes all four "devices" which
+ * can be used by linux.
+ *
+ * Then, "ifconfig DEVICE up" calls "tile_net_open()", which analyzes
+ * the network cpus, then uses "tile_net_open_aux()" to initialize
+ * LIPP/LEPP, and then uses "tile_net_open_inner()" to register all
+ * the tiles, provide buffers to LIPP, allow ingress to start, and
+ * turn on hypervisor interrupt handling (and NAPI) on all tiles.
+ *
+ * If registration fails due to the link being down, then "retry_work"
+ * is used to keep calling "tile_net_open_inner()" until it succeeds.
+ *
+ * If "ifconfig DEVICE down" is called, it uses "tile_net_stop()" to
+ * stop egress, drain the LIPP buffers, unregister all the tiles, stop
+ * LIPP/LEPP, and wipe the LEPP queue.
+ *
+ * We start out with the ingress interrupt enabled on each CPU.  When
+ * this interrupt fires, we disable it, and call "napi_schedule()".
+ * This will cause "tile_net_poll()" to be called, which will pull
+ * packets from the netio queue, filtering them out, or passing them
+ * to "netif_receive_skb()".  If our budget is exhausted, we will
+ * return, knowing we will be called again later.  Otherwise, we
+ * reenable the ingress interrupt, and call "napi_complete()".
+ *
+ *
+ * NOTE: The use of "native_driver" ensures that EPP exists, and that
+ * "epp_sendv" is legal, and that "LIPP" is being used.
+ *
+ * NOTE: Failing to free completions for an arbitrarily long time
+ * (which is defined to be illegal) does in fact cause bizarre
+ * problems.  The "egress_timer" helps prevent this from happening.
+ *
+ * NOTE: The egress code can be interrupted by the interrupt handler.
+ */
+
+
+/* HACK: Allow use of "jumbo" packets. */
+/* This should be 1500 if "jumbo" is not set in LIPP. */
+/* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */
+/* ISSUE: This has not been thoroughly tested (except at 1500). */
+#define TILE_NET_MTU 1500
+
+/* HACK: Define to support GSO. */
+/* ISSUE: This may actually hurt performance of the TCP blaster. */
+/* #define TILE_NET_GSO */
+
+/* Define this to collapse "duplicate" acks. */
+/* #define IGNORE_DUP_ACKS */
+
+/* HACK: Define this to verify incoming packets. */
+/* #define TILE_NET_VERIFY_INGRESS */
+
+/* Use 3000 to enable the Linux Traffic Control (QoS) layer, else 0. */
+#define TILE_NET_TX_QUEUE_LEN 0
+
+/* Define to dump packets (prints out the whole packet on tx and rx). */
+/* #define TILE_NET_DUMP_PACKETS */
+
+/* Define to enable debug spew (all PDEBUG's are enabled). */
+/* #define TILE_NET_DEBUG */
+
+
+/* Define to activate paranoia checks. */
+/* #define TILE_NET_PARANOIA */
+
+/* Default transmit lockup timeout period, in jiffies. */
+#define TILE_NET_TIMEOUT (5 * HZ)
+
+/* Default retry interval for bringing up the NetIO interface, in jiffies. */
+#define TILE_NET_RETRY_INTERVAL (5 * HZ)
+
+/* Number of ports (xgbe0, xgbe1, gbe0, gbe1). */
+#define TILE_NET_DEVS 4
+
+
+
+/* Paranoia. */
+#if NET_IP_ALIGN != LIPP_PACKET_PADDING
+#error "NET_IP_ALIGN must match LIPP_PACKET_PADDING."
+#endif
+
+
+/* Debug print. */
+#ifdef TILE_NET_DEBUG
+#define PDEBUG(fmt, args...) net_printk(fmt, ## args)
+#else
+#define PDEBUG(fmt, args...)
+#endif
+
+
+MODULE_AUTHOR("Tilera");
+MODULE_LICENSE("GPL");
+
+
+#define IS_MULTICAST(mac_addr) \
+	(((u8 *)(mac_addr))[0] & 0x01)
+
+#define IS_BROADCAST(mac_addr) \
+	(((u16 *)(mac_addr))[0] == 0xffff)
+
+
+/*
+ * Queue of incoming packets for a specific cpu and device.
+ *
+ * Includes a pointer to the "system" data, and the actual "user" data.
+ */
+struct tile_netio_queue {
+	netio_queue_impl_t *__system_part;
+	netio_queue_user_impl_t __user_part;
+
+};
+
+
+/*
+ * Statistics counters for a specific cpu and device.
+ */
+struct tile_net_stats_t {
+	u32 rx_packets;
+	u32 rx_bytes;
+	u32 tx_packets;
+	u32 tx_bytes;
+};
+
+
+/*
+ * Info for a specific cpu and device.
+ *
+ * ISSUE: There is a "dev" pointer in "napi" as well.
+ */
+struct tile_net_cpu {
+	/* The NAPI struct. */
+	struct napi_struct napi;
+	/* Packet queue. */
+	struct tile_netio_queue queue;
+	/* Statistics. */
+	struct tile_net_stats_t stats;
+	/* ISSUE: Is this needed? */
+	bool napi_enabled;
+	/* True if this tile has succcessfully registered with the IPP. */
+	bool registered;
+	/* True if the link was down last time we tried to register. */
+	bool link_down;
+	/* True if "egress_timer" is scheduled. */
+	bool egress_timer_scheduled;
+	/* Number of small sk_buffs which must still be provided. */
+	unsigned int num_needed_small_buffers;
+	/* Number of large sk_buffs which must still be provided. */
+	unsigned int num_needed_large_buffers;
+	/* A timer for handling egress completions. */
+	struct timer_list egress_timer;
+};
+
+
+/*
+ * Info for a specific device.
+ */
+struct tile_net_priv {
+	/* Our network device. */
+	struct net_device *dev;
+	/* The actual egress queue. */
+	lepp_queue_t *epp_queue;
+	/* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */
+	spinlock_t cmd_lock;
+	/* Protects "epp_queue->comp_head". */
+	spinlock_t comp_lock;
+	/* The hypervisor handle for this interface. */
+	int hv_devhdl;
+	/* The intr bit mask that IDs this device. */
+	u32 intr_id;
+	/* True iff "tile_net_open_aux()" has succeeded. */
+	int partly_opened;
+	/* True iff "tile_net_open_inner()" has succeeded. */
+	int fully_opened;
+	/* Effective network cpus. */
+	struct cpumask network_cpus_map;
+	/* Number of network cpus. */
+	int network_cpus_count;
+	/* Credits per network cpu. */
+	int network_cpus_credits;
+	/* Network stats. */
+	struct net_device_stats stats;
+	/* For NetIO bringup retries. */
+	struct delayed_work retry_work;
+	/* Quick access to per cpu data. */
+	struct tile_net_cpu *cpu[NR_CPUS];
+};
+
+
+/*
+ * The actual devices (xgbe0, xgbe1, gbe0, gbe1).
+ */
+static struct net_device *tile_net_devs[TILE_NET_DEVS];
+
+/*
+ * The "tile_net_cpu" structures for each device.
+ */
+static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe0);
+static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe1);
+static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe0);
+static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe1);
+
+
+/*
+ * True if "network_cpus" was specified.
+ */
+static bool network_cpus_used;
+
+/*
+ * The actual cpus in "network_cpus".
+ */
+static struct cpumask network_cpus_map;
+
+
+
+#ifdef TILE_NET_DEBUG
+/*
+ * printk with extra stuff.
+ *
+ * We print the CPU we're running in brackets.
+ */
+static void net_printk(char *fmt, ...)
+{
+	int i;
+	int len;
+	va_list args;
+	static char buf[256];
+
+	len = sprintf(buf, "tile_net[%2.2d]: ", smp_processor_id());
+	va_start(args, fmt);
+	i = vscnprintf(buf + len, sizeof(buf) - len - 1, fmt, args);
+	va_end(args);
+	buf[255] = '\0';
+	pr_notice(buf);
+}
+#endif
+
+
+#ifdef TILE_NET_DUMP_PACKETS
+/*
+ * Dump a packet.
+ */
+static void dump_packet(unsigned char *data, unsigned long length, char *s)
+{
+	unsigned long i;
+	static unsigned int count;
+
+	pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n",
+	       data, length, s, count++);
+
+	pr_info("\n");
+
+	for (i = 0; i < length; i++) {
+		if ((i & 0xf) == 0)
+			sprintf(buf, "%8.8lx:", i);
+		sprintf(buf + strlen(buf), " %2.2x", data[i]);
+		if ((i & 0xf) == 0xf || i == length - 1)
+			pr_info("%s\n", buf);
+	}
+}
+#endif
+
+
+/*
+ * Provide support for the __netio_fastio1() swint
+ * (see <hv/drv_xgbe_intf.h> for how it is used).
+ *
+ * The fastio swint2 call may clobber all the caller-saved registers.
+ * It rarely clobbers memory, but we allow for the possibility in
+ * the signature just to be on the safe side.
+ *
+ * Also, gcc doesn't seem to allow an input operand to be
+ * clobbered, so we fake it with dummy outputs.
+ *
+ * This function can't be static because of the way it is declared
+ * in the netio header.
+ */
+inline int __netio_fastio1(u32 fastio_index, u32 arg0)
+{
+	long result, clobber_r1, clobber_r10;
+	asm volatile("swint2"
+		     : "=R00" (result),
+		       "=R01" (clobber_r1), "=R10" (clobber_r10)
+		     : "R10" (fastio_index), "R01" (arg0)
+		     : "memory", "r2", "r3", "r4",
+		       "r5", "r6", "r7", "r8", "r9",
+		       "r11", "r12", "r13", "r14",
+		       "r15", "r16", "r17", "r18", "r19",
+		       "r20", "r21", "r22", "r23", "r24",
+		       "r25", "r26", "r27", "r28", "r29");
+	return result;
+}
+
+
+/*
+ * Provide a linux buffer to LIPP.
+ */
+static void tile_net_provide_linux_buffer(struct tile_net_cpu *info,
+					  void *va, bool small)
+{
+	struct tile_netio_queue *queue = &info->queue;
+
+	/* Convert "va" and "small" to "linux_buffer_t". */
+	unsigned int buffer = ((unsigned int)(__pa(va) >> 7) << 1) + small;
+
+	__netio_fastio_free_buffer(queue->__user_part.__fastio_index, buffer);
+}
+
+
+/*
+ * Provide a linux buffer for LIPP.
+ */
+static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
+					   bool small)
+{
+	/* ISSUE: What should we use here? */
+	unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100;
+
+	/* Round up to ensure to avoid "false sharing" with last cache line. */
+	unsigned int buffer_size =
+		 (((small ? LIPP_SMALL_PACKET_SIZE : large_size) +
+		   CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE());
+
+	/*
+	 * ISSUE: Since CPAs are 38 bits, and we can only encode the
+	 * high 31 bits in a "linux_buffer_t", the low 7 bits must be
+	 * zero, and thus, we must align the actual "va" mod 128.
+	 */
+	const unsigned long align = 128;
+
+	struct sk_buff *skb;
+	void *va;
+
+	struct sk_buff **skb_ptr;
+
+	/* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */
+	/* and also "reserves" that many bytes. */
+	/* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */
+	int len = sizeof(*skb_ptr) + align + buffer_size;
+
+	while (1) {
+
+		/* Allocate (or fail). */
+		skb = dev_alloc_skb(len);
+		if (skb == NULL)
+			return false;
+
+		/* Make room for a back-pointer to 'skb'. */
+		skb_reserve(skb, sizeof(*skb_ptr));
+
+		/* Make sure we are aligned. */
+		skb_reserve(skb, -(long)skb->data & (align - 1));
+
+		/* This address is given to IPP. */
+		va = skb->data;
+
+		if (small)
+			break;
+
+		/* ISSUE: This has never been observed! */
+		/* Large buffers must not span a huge page. */
+		if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0)
+			break;
+		pr_err("Leaking unaligned linux buffer at %p.\n", va);
+	}
+
+	/* Skip two bytes to satisfy LIPP assumptions. */
+	/* Note that this aligns IP on a 16 byte boundary. */
+	/* ISSUE: Do this when the packet arrives? */
+	skb_reserve(skb, NET_IP_ALIGN);
+
+	/* Save a back-pointer to 'skb'. */
+	skb_ptr = va - sizeof(*skb_ptr);
+	*skb_ptr = skb;
+
+	/* Invalidate the packet buffer. */
+	if (!hash_default)
+		__inv_buffer(skb->data, buffer_size);
+
+	/* Make sure "skb_ptr" has been flushed. */
+	__insn_mf();
+
+#ifdef TILE_NET_PARANOIA
+#if CHIP_HAS_CBOX_HOME_MAP()
+	if (hash_default) {
+		HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
+		if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
+			panic("Non-coherent ingress buffer!");
+	}
+#endif
+#endif
+
+	/* Provide the new buffer. */
+	tile_net_provide_linux_buffer(info, va, small);
+
+	return true;
+}
+
+
+/*
+ * Provide linux buffers for LIPP.
+ */
+static void tile_net_provide_needed_buffers(struct tile_net_cpu *info)
+{
+	while (info->num_needed_small_buffers != 0) {
+		if (!tile_net_provide_needed_buffer(info, true))
+			goto oops;
+		info->num_needed_small_buffers--;
+	}
+
+	while (info->num_needed_large_buffers != 0) {
+		if (!tile_net_provide_needed_buffer(info, false))
+			goto oops;
+		info->num_needed_large_buffers--;
+	}
+
+	return;
+
+oops:
+
+	/* Add a description to the page allocation failure dump. */
+	pr_notice("Could not provide a linux buffer to LIPP.\n");
+}
+
+
+/*
+ * Grab some LEPP completions, and store them in "comps", of size
+ * "comps_size", and return the number of completions which were
+ * stored, so the caller can free them.
+ *
+ * If "pending" is not NULL, it will be set to true if there might
+ * still be some pending completions caused by this tile, else false.
+ */
+static unsigned int tile_net_lepp_grab_comps(struct net_device *dev,
+					     struct sk_buff *comps[],
+					     unsigned int comps_size,
+					     bool *pending)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	lepp_queue_t *eq = priv->epp_queue;
+
+	unsigned int n = 0;
+
+	unsigned int comp_head;
+	unsigned int comp_busy;
+	unsigned int comp_tail;
+
+	spin_lock(&priv->comp_lock);
+
+	comp_head = eq->comp_head;
+	comp_busy = eq->comp_busy;
+	comp_tail = eq->comp_tail;
+
+	while (comp_head != comp_busy && n < comps_size) {
+		comps[n++] = eq->comps[comp_head];
+		LEPP_QINC(comp_head);
+	}
+
+	if (pending != NULL)
+		*pending = (comp_head != comp_tail);
+
+	eq->comp_head = comp_head;
+
+	spin_unlock(&priv->comp_lock);
+
+	return n;
+}
+
+
+/*
+ * Make sure the egress timer is scheduled.
+ *
+ * Note that we use "schedule if not scheduled" logic instead of the more
+ * obvious "reschedule" logic, because "reschedule" is fairly expensive.
+ */
+static void tile_net_schedule_egress_timer(struct tile_net_cpu *info)
+{
+	if (!info->egress_timer_scheduled) {
+		mod_timer_pinned(&info->egress_timer, jiffies + 1);
+		info->egress_timer_scheduled = true;
+	}
+}
+
+
+/*
+ * The "function" for "info->egress_timer".
+ *
+ * This timer will reschedule itself as long as there are any pending
+ * completions expected (on behalf of any tile).
+ *
+ * ISSUE: Realistically, will the timer ever stop scheduling itself?
+ *
+ * ISSUE: This timer is almost never actually needed, so just use a global
+ * timer that can run on any tile.
+ *
+ * ISSUE: Maybe instead track number of expected completions, and free
+ * only that many, resetting to zero if "pending" is ever false.
+ */
+static void tile_net_handle_egress_timer(unsigned long arg)
+{
+	struct tile_net_cpu *info = (struct tile_net_cpu *)arg;
+	struct net_device *dev = info->napi.dev;
+
+	struct sk_buff *olds[32];
+	unsigned int wanted = 32;
+	unsigned int i, nolds = 0;
+	bool pending;
+
+	/* The timer is no longer scheduled. */
+	info->egress_timer_scheduled = false;
+
+	nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending);
+
+	for (i = 0; i < nolds; i++)
+		kfree_skb(olds[i]);
+
+	/* Reschedule timer if needed. */
+	if (pending)
+		tile_net_schedule_egress_timer(info);
+}
+
+
+#ifdef IGNORE_DUP_ACKS
+
+/*
+ * Help detect "duplicate" ACKs.  These are sequential packets (for a
+ * given flow) which are exactly 66 bytes long, sharing everything but
+ * ID=2@0x12, Hsum=2@0x18, Ack=4@0x2a, WinSize=2@0x30, Csum=2@0x32,
+ * Tstamps=10@0x38.  The ID's are +1, the Hsum's are -1, the Ack's are
+ * +N, and the Tstamps are usually identical.
+ *
+ * NOTE: Apparently truly duplicate acks (with identical "ack" values),
+ * should not be collapsed, as they are used for some kind of flow control.
+ */
+static bool is_dup_ack(char *s1, char *s2, unsigned int len)
+{
+	int i;
+
+	unsigned long long ignorable = 0;
+
+	/* Identification. */
+	ignorable |= (1ULL << 0x12);
+	ignorable |= (1ULL << 0x13);
+
+	/* Header checksum. */
+	ignorable |= (1ULL << 0x18);
+	ignorable |= (1ULL << 0x19);
+
+	/* ACK. */
+	ignorable |= (1ULL << 0x2a);
+	ignorable |= (1ULL << 0x2b);
+	ignorable |= (1ULL << 0x2c);
+	ignorable |= (1ULL << 0x2d);
+
+	/* WinSize. */
+	ignorable |= (1ULL << 0x30);
+	ignorable |= (1ULL << 0x31);
+
+	/* Checksum. */
+	ignorable |= (1ULL << 0x32);
+	ignorable |= (1ULL << 0x33);
+
+	for (i = 0; i < len; i++, ignorable >>= 1) {
+
+		if ((ignorable & 1) || (s1[i] == s2[i]))
+			continue;
+
+#ifdef TILE_NET_DEBUG
+		/* HACK: Mention non-timestamp diffs. */
+		if (i < 0x38 && i != 0x2f &&
+		    net_ratelimit())
+			pr_info("Diff at 0x%x\n", i);
+#endif
+
+		return false;
+	}
+
+#ifdef TILE_NET_NO_SUPPRESS_DUP_ACKS
+	/* HACK: Do not suppress truly duplicate ACKs. */
+	/* ISSUE: Is this actually necessary or helpful? */
+	if (s1[0x2a] == s2[0x2a] &&
+	    s1[0x2b] == s2[0x2b] &&
+	    s1[0x2c] == s2[0x2c] &&
+	    s1[0x2d] == s2[0x2d]) {
+		return false;
+	}
+#endif
+
+	return true;
+}
+
+#endif
+
+
+
+/*
+ * Like "tile_net_handle_packets()", but just discard packets.
+ */
+static void tile_net_discard_packets(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+	struct tile_netio_queue *queue = &info->queue;
+	netio_queue_impl_t *qsp = queue->__system_part;
+	netio_queue_user_impl_t *qup = &queue->__user_part;
+
+	while (qup->__packet_receive_read !=
+	       qsp->__packet_receive_queue.__packet_write) {
+
+		int index = qup->__packet_receive_read;
+
+		int index2_aux = index + sizeof(netio_pkt_t);
+		int index2 =
+			((index2_aux ==
+			  qsp->__packet_receive_queue.__last_packet_plus_one) ?
+			 0 : index2_aux);
+
+		netio_pkt_t *pkt = (netio_pkt_t *)
+			((unsigned long) &qsp[1] + index);
+
+		/* Extract the "linux_buffer_t". */
+		unsigned int buffer = pkt->__packet.word;
+
+		/* Convert "linux_buffer_t" to "va". */
+		void *va = __va((phys_addr_t)(buffer >> 1) << 7);
+
+		/* Acquire the associated "skb". */
+		struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
+		struct sk_buff *skb = *skb_ptr;
+
+		kfree_skb(skb);
+
+		/* Consume this packet. */
+		qup->__packet_receive_read = index2;
+	}
+}
+
+
+/*
+ * Handle the next packet.  Return true if "processed", false if "filtered".
+ */
+static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
+{
+	struct net_device *dev = info->napi.dev;
+
+	struct tile_netio_queue *queue = &info->queue;
+	netio_queue_impl_t *qsp = queue->__system_part;
+	netio_queue_user_impl_t *qup = &queue->__user_part;
+	struct tile_net_stats_t *stats = &info->stats;
+
+	int filter;
+
+	int index2_aux = index + sizeof(netio_pkt_t);
+	int index2 =
+		((index2_aux ==
+		  qsp->__packet_receive_queue.__last_packet_plus_one) ?
+		 0 : index2_aux);
+
+	netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index);
+
+	netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt);
+
+	/* Extract the packet size. */
+	unsigned long len =
+		(NETIO_PKT_CUSTOM_LENGTH(pkt) +
+		 NET_IP_ALIGN - NETIO_PACKET_PADDING);
+
+	/* Extract the "linux_buffer_t". */
+	unsigned int buffer = pkt->__packet.word;
+
+	/* Extract "small" (vs "large"). */
+	bool small = ((buffer & 1) != 0);
+
+	/* Convert "linux_buffer_t" to "va". */
+	void *va = __va((phys_addr_t)(buffer >> 1) << 7);
+
+	/* Extract the packet data pointer. */
+	/* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
+	unsigned char *buf = va + NET_IP_ALIGN;
+
+#ifdef IGNORE_DUP_ACKS
+
+	static int other;
+	static int final;
+	static int keep;
+	static int skip;
+
+#endif
+
+	/* Invalidate the packet buffer. */
+	if (!hash_default)
+		__inv_buffer(buf, len);
+
+	/* ISSUE: Is this needed? */
+	dev->last_rx = jiffies;
+
+#ifdef TILE_NET_DUMP_PACKETS
+	dump_packet(buf, len, "rx");
+#endif /* TILE_NET_DUMP_PACKETS */
+
+#ifdef TILE_NET_VERIFY_INGRESS
+	if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) &&
+	    NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) {
+		/*
+		 * FIXME: This complains about UDP packets
+		 * with a "zero" checksum (bug 6624).
+		 */
+#ifdef TILE_NET_PANIC_ON_BAD
+		dump_packet(buf, len, "rx");
+		panic("Bad L4 checksum.");
+#else
+		pr_warning("Bad L4 checksum on %d byte packet.\n", len);
+#endif
+	}
+	if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) &&
+	    NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) {
+		dump_packet(buf, len, "rx");
+		panic("Bad L3 checksum.");
+	}
+	switch (NETIO_PKT_STATUS_M(metadata, pkt)) {
+	case NETIO_PKT_STATUS_OVERSIZE:
+		if (len >= 64) {
+			dump_packet(buf, len, "rx");
+			panic("Unexpected OVERSIZE.");
+		}
+		break;
+	case NETIO_PKT_STATUS_BAD:
+#ifdef TILE_NET_PANIC_ON_BAD
+		dump_packet(buf, len, "rx");
+		panic("Unexpected BAD packet.");
+#else
+		pr_warning("Unexpected BAD %d byte packet.\n", len);
+#endif
+	}
+#endif
+
+	filter = 0;
+
+	if (!(dev->flags & IFF_UP)) {
+		/* Filter packets received before we're up. */
+		filter = 1;
+	} else if (!(dev->flags & IFF_PROMISC)) {
+		/*
+		 * FIXME: Implement HW multicast filter.
+		 */
+		if (!IS_MULTICAST(buf) && !IS_BROADCAST(buf)) {
+			/* Filter packets not for our address. */
+			const u8 *mine = dev->dev_addr;
+			filter = compare_ether_addr(mine, buf);
+		}
+	}
+
+#ifdef IGNORE_DUP_ACKS
+
+	if (len != 66) {
+		/* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */
+
+		other++;
+
+	} else if (index2 ==
+		   qsp->__packet_receive_queue.__packet_write) {
+
+		final++;
+
+	} else {
+
+		netio_pkt_t *pkt2 = (netio_pkt_t *)
+			((unsigned long) &qsp[1] + index2);
+
+		netio_pkt_metadata_t *metadata2 =
+			NETIO_PKT_METADATA(pkt2);
+
+		/* Extract the packet size. */
+		unsigned long len2 =
+			(NETIO_PKT_CUSTOM_LENGTH(pkt2) +
+			 NET_IP_ALIGN - NETIO_PACKET_PADDING);
+
+		if (len2 == 66 &&
+		    NETIO_PKT_FLOW_HASH_M(metadata, pkt) ==
+		    NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) {
+
+			/* Extract the "linux_buffer_t". */
+			unsigned int buffer2 = pkt2->__packet.word;
+
+			/* Convert "linux_buffer_t" to "va". */
+			void *va2 =
+				__va((phys_addr_t)(buffer2 >> 1) << 7);
+
+			/* Extract the packet data pointer. */
+			/* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
+			unsigned char *buf2 = va2 + NET_IP_ALIGN;
+
+			/* Invalidate the packet buffer. */
+			if (!hash_default)
+				__inv_buffer(buf2, len2);
+
+			if (is_dup_ack(buf, buf2, len)) {
+				skip++;
+				filter = 1;
+			} else {
+				keep++;
+			}
+		}
+	}
+
+	if (net_ratelimit())
+		pr_info("Other %d Final %d Keep %d Skip %d.\n",
+			other, final, keep, skip);
+
+#endif
+
+	if (filter) {
+
+		/* ISSUE: Update "drop" statistics? */
+
+		tile_net_provide_linux_buffer(info, va, small);
+
+	} else {
+
+		/* Acquire the associated "skb". */
+		struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
+		struct sk_buff *skb = *skb_ptr;
+
+		/* Paranoia. */
+		if (skb->data != buf)
+			panic("Corrupt linux buffer from LIPP! "
+			      "VA=%p, skb=%p, skb->data=%p\n",
+			      va, skb, skb->data);
+
+		/* Encode the actual packet length. */
+		skb_put(skb, len);
+
+		/* NOTE: This call also sets "skb->dev = dev". */
+		skb->protocol = eth_type_trans(skb, dev);
+
+		/* ISSUE: Discard corrupt packets? */
+		/* ISSUE: Discard packets with bad checksums? */
+
+		/* Avoid recomputing TCP/UDP checksums. */
+		if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		netif_receive_skb(skb);
+
+		stats->rx_packets++;
+		stats->rx_bytes += len;
+
+		if (small)
+			info->num_needed_small_buffers++;
+		else
+			info->num_needed_large_buffers++;
+	}
+
+	/* Return four credits after every fourth packet. */
+	if (--qup->__receive_credit_remaining == 0) {
+		u32 interval = qup->__receive_credit_interval;
+		qup->__receive_credit_remaining = interval;
+		__netio_fastio_return_credits(qup->__fastio_index, interval);
+	}
+
+	/* Consume this packet. */
+	qup->__packet_receive_read = index2;
+
+	return !filter;
+}
+
+
+/*
+ * Handle some packets for the given device on the current CPU.
+ *
+ * ISSUE: The "rotting packet" race condition occurs if a packet
+ * arrives after the queue appears to be empty, and before the
+ * hypervisor interrupt is re-enabled.
+ */
+static int tile_net_poll(struct napi_struct *napi, int budget)
+{
+	struct net_device *dev = napi->dev;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+	struct tile_netio_queue *queue = &info->queue;
+	netio_queue_impl_t *qsp = queue->__system_part;
+	netio_queue_user_impl_t *qup = &queue->__user_part;
+
+	unsigned int work = 0;
+
+	while (1) {
+		int index = qup->__packet_receive_read;
+		if (index == qsp->__packet_receive_queue.__packet_write)
+			break;
+
+		if (tile_net_poll_aux(info, index)) {
+			if (++work >= budget)
+				goto done;
+		}
+	}
+
+	napi_complete(&info->napi);
+
+	/* Re-enable hypervisor interrupts. */
+	enable_percpu_irq(priv->intr_id);
+
+	/* HACK: Avoid the "rotting packet" problem. */
+	if (qup->__packet_receive_read !=
+	    qsp->__packet_receive_queue.__packet_write)
+		napi_schedule(&info->napi);
+
+	/* ISSUE: Handle completions? */
+
+done:
+
+	tile_net_provide_needed_buffers(info);
+
+	return work;
+}
+
+
+/*
+ * Handle an ingress interrupt for the given device on the current cpu.
+ */
+static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+
+	/* Disable hypervisor interrupt. */
+	disable_percpu_irq(priv->intr_id);
+
+	napi_schedule(&info->napi);
+
+	return IRQ_HANDLED;
+}
+
+
+/*
+ * One time initialization per interface.
+ */
+static int tile_net_open_aux(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	int ret;
+	int dummy;
+	unsigned int epp_lotar;
+
+	/*
+	 * Find out where EPP memory should be homed.
+	 */
+	ret = hv_dev_pread(priv->hv_devhdl, 0,
+			   (HV_VirtAddr)&epp_lotar, sizeof(epp_lotar),
+			   NETIO_EPP_SHM_OFF);
+	if (ret < 0) {
+		pr_err("could not read epp_shm_queue lotar.\n");
+		return -EIO;
+	}
+
+	/*
+	 * Home the page on the EPP.
+	 */
+	{
+		int epp_home = hv_lotar_to_cpu(epp_lotar);
+		struct page *page = virt_to_page(priv->epp_queue);
+		homecache_change_page_home(page, 0, epp_home);
+	}
+
+	/*
+	 * Register the EPP shared memory queue.
+	 */
+	{
+		netio_ipp_address_t ea = {
+			.va = 0,
+			.pa = __pa(priv->epp_queue),
+			.pte = hv_pte(0),
+			.size = PAGE_SIZE,
+		};
+		ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar);
+		ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3);
+		ret = hv_dev_pwrite(priv->hv_devhdl, 0,
+				    (HV_VirtAddr)&ea,
+				    sizeof(ea),
+				    NETIO_EPP_SHM_OFF);
+		if (ret < 0)
+			return -EIO;
+	}
+
+	/*
+	 * Start LIPP/LEPP.
+	 */
+	if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
+			  sizeof(dummy), NETIO_IPP_START_SHIM_OFF) < 0) {
+		pr_warning("Failed to start LIPP/LEPP.\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Register with hypervisor on each CPU.
+ *
+ * Strangely, this function does important things even if it "fails",
+ * which is especially common if the link is not up yet.  Hopefully
+ * these things are all "harmless" if done twice!
+ */
+static void tile_net_register(void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info;
+
+	struct tile_netio_queue *queue;
+
+	/* Only network cpus can receive packets. */
+	int queue_id =
+		cpumask_test_cpu(my_cpu, &priv->network_cpus_map) ? 0 : 255;
+
+	netio_input_config_t config = {
+		.flags = 0,
+		.num_receive_packets = priv->network_cpus_credits,
+		.queue_id = queue_id
+	};
+
+	int ret = 0;
+	netio_queue_impl_t *queuep;
+
+	PDEBUG("tile_net_register(queue_id %d)\n", queue_id);
+
+	if (!strcmp(dev->name, "xgbe0"))
+		info = &__get_cpu_var(hv_xgbe0);
+	else if (!strcmp(dev->name, "xgbe1"))
+		info = &__get_cpu_var(hv_xgbe1);
+	else if (!strcmp(dev->name, "gbe0"))
+		info = &__get_cpu_var(hv_gbe0);
+	else if (!strcmp(dev->name, "gbe1"))
+		info = &__get_cpu_var(hv_gbe1);
+	else
+		BUG();
+
+	/* Initialize the egress timer. */
+	init_timer(&info->egress_timer);
+	info->egress_timer.data = (long)info;
+	info->egress_timer.function = tile_net_handle_egress_timer;
+
+	priv->cpu[my_cpu] = info;
+
+	/*
+	 * Register ourselves with the IPP.
+	 */
+	ret = hv_dev_pwrite(priv->hv_devhdl, 0,
+			    (HV_VirtAddr)&config,
+			    sizeof(netio_input_config_t),
+			    NETIO_IPP_INPUT_REGISTER_OFF);
+	PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
+	       ret);
+	if (ret < 0) {
+		printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF"
+		       " failure %d\n", ret);
+		info->link_down = (ret == NETIO_LINK_DOWN);
+		return;
+	}
+
+	/*
+	 * Get the pointer to our queue's system part.
+	 */
+
+	ret = hv_dev_pread(priv->hv_devhdl, 0,
+			   (HV_VirtAddr)&queuep,
+			   sizeof(netio_queue_impl_t *),
+			   NETIO_IPP_INPUT_REGISTER_OFF);
+	PDEBUG("hv_dev_pread(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
+	       ret);
+	PDEBUG("queuep %p\n", queuep);
+	if (ret <= 0) {
+		/* ISSUE: Shouldn't this be a fatal error? */
+		pr_err("hv_dev_pread NETIO_IPP_INPUT_REGISTER_OFF failure\n");
+		return;
+	}
+
+	queue = &info->queue;
+
+	queue->__system_part = queuep;
+
+	memset(&queue->__user_part, 0, sizeof(netio_queue_user_impl_t));
+
+	/* This is traditionally "config.num_receive_packets / 2". */
+	queue->__user_part.__receive_credit_interval = 4;
+	queue->__user_part.__receive_credit_remaining =
+		queue->__user_part.__receive_credit_interval;
+
+	/*
+	 * Get a fastio index from the hypervisor.
+	 * ISSUE: Shouldn't this check the result?
+	 */
+	ret = hv_dev_pread(priv->hv_devhdl, 0,
+			   (HV_VirtAddr)&queue->__user_part.__fastio_index,
+			   sizeof(queue->__user_part.__fastio_index),
+			   NETIO_IPP_GET_FASTIO_OFF);
+	PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret);
+
+	netif_napi_add(dev, &info->napi, tile_net_poll, 64);
+
+	/* Now we are registered. */
+	info->registered = true;
+}
+
+
+/*
+ * Unregister with hypervisor on each CPU.
+ */
+static void tile_net_unregister(void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+
+	int ret = 0;
+	int dummy = 0;
+
+	/* Do nothing if never registered. */
+	if (info == NULL)
+		return;
+
+	/* Do nothing if already unregistered. */
+	if (!info->registered)
+		return;
+
+	/*
+	 * Unregister ourselves with LIPP.
+	 */
+	ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
+			    sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF);
+	PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n",
+	       ret);
+	if (ret < 0) {
+		/* FIXME: Just panic? */
+		pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF"
+		       " failure %d\n", ret);
+	}
+
+	/*
+	 * Discard all packets still in our NetIO queue.  Hopefully,
+	 * once the unregister call is complete, there will be no
+	 * packets still in flight on the IDN.
+	 */
+	tile_net_discard_packets(dev);
+
+	/* Reset state. */
+	info->num_needed_small_buffers = 0;
+	info->num_needed_large_buffers = 0;
+
+	/* Cancel egress timer. */
+	del_timer(&info->egress_timer);
+	info->egress_timer_scheduled = false;
+
+	netif_napi_del(&info->napi);
+
+	/* Now we are unregistered. */
+	info->registered = false;
+}
+
+
+/*
+ * Helper function for "tile_net_stop()".
+ *
+ * Also used to handle registration failure in "tile_net_open_inner()",
+ * when "fully_opened" is known to be false, and the various extra
+ * steps in "tile_net_stop()" are not necessary.  ISSUE: It might be
+ * simpler if we could just call "tile_net_stop()" anyway.
+ */
+static void tile_net_stop_aux(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	int dummy = 0;
+
+	/* Unregister all tiles, so LIPP will stop delivering packets. */
+	on_each_cpu(tile_net_unregister, (void *)dev, 1);
+
+	/* Stop LIPP/LEPP. */
+	if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
+			  sizeof(dummy), NETIO_IPP_STOP_SHIM_OFF) < 0)
+		panic("Failed to stop LIPP/LEPP!\n");
+
+	priv->partly_opened = 0;
+}
+
+
+/*
+ * Disable ingress interrupts for the given device on the current cpu.
+ */
+static void tile_net_disable_intr(void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+
+	/* Disable hypervisor interrupt. */
+	disable_percpu_irq(priv->intr_id);
+
+	/* Disable NAPI if needed. */
+	if (info != NULL && info->napi_enabled) {
+		napi_disable(&info->napi);
+		info->napi_enabled = false;
+	}
+}
+
+
+/*
+ * Enable ingress interrupts for the given device on the current cpu.
+ */
+static void tile_net_enable_intr(void *dev_ptr)
+{
+	struct net_device *dev = (struct net_device *)dev_ptr;
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+
+	/* Enable hypervisor interrupt. */
+	enable_percpu_irq(priv->intr_id);
+
+	/* Enable NAPI. */
+	napi_enable(&info->napi);
+	info->napi_enabled = true;
+}
+
+
+/*
+ * tile_net_open_inner does most of the work of bringing up the interface.
+ * It's called from tile_net_open(), and also from tile_net_retry_open().
+ * The return value is 0 if the interface was brought up, < 0 if
+ * tile_net_open() should return the return value as an error, and > 0 if
+ * tile_net_open() should return success and schedule a work item to
+ * periodically retry the bringup.
+ */
+static int tile_net_open_inner(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info;
+	struct tile_netio_queue *queue;
+	unsigned int irq;
+	int i;
+
+	/*
+	 * First try to register just on the local CPU, and handle any
+	 * semi-expected "link down" failure specially.  Note that we
+	 * do NOT call "tile_net_stop_aux()", unlike below.
+	 */
+	tile_net_register(dev);
+	info = priv->cpu[my_cpu];
+	if (!info->registered) {
+		if (info->link_down)
+			return 1;
+		return -EAGAIN;
+	}
+
+	/*
+	 * Now register everywhere else.  If any registration fails,
+	 * even for "link down" (which might not be possible), we
+	 * clean up using "tile_net_stop_aux()".
+	 */
+	smp_call_function(tile_net_register, (void *)dev, 1);
+	for_each_online_cpu(i) {
+		if (!priv->cpu[i]->registered) {
+			tile_net_stop_aux(dev);
+			return -EAGAIN;
+		}
+	}
+
+	queue = &info->queue;
+
+	/*
+	 * Set the device intr bit mask.
+	 * The tile_net_register above sets per tile __intr_id.
+	 */
+	priv->intr_id = queue->__system_part->__intr_id;
+	BUG_ON(!priv->intr_id);
+
+	/*
+	 * Register the device interrupt handler.
+	 * The __ffs() function returns the index into the interrupt handler
+	 * table from the interrupt bit mask which should have one bit
+	 * and one bit only set.
+	 */
+	irq = __ffs(priv->intr_id);
+	tile_irq_activate(irq, TILE_IRQ_PERCPU);
+	BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
+			   0, dev->name, (void *)dev) != 0);
+
+	/* ISSUE: How could "priv->fully_opened" ever be "true" here? */
+
+	if (!priv->fully_opened) {
+
+		int dummy = 0;
+
+		/* Allocate initial buffers. */
+
+		int max_buffers =
+			priv->network_cpus_count * priv->network_cpus_credits;
+
+		info->num_needed_small_buffers =
+			min(LIPP_SMALL_BUFFERS, max_buffers);
+
+		info->num_needed_large_buffers =
+			min(LIPP_LARGE_BUFFERS, max_buffers);
+
+		tile_net_provide_needed_buffers(info);
+
+		if (info->num_needed_small_buffers != 0 ||
+		    info->num_needed_large_buffers != 0)
+			panic("Insufficient memory for buffer stack!");
+
+		/* Start LIPP/LEPP and activate "ingress" at the shim. */
+		if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
+				  sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
+			panic("Failed to activate the LIPP Shim!\n");
+
+		priv->fully_opened = 1;
+	}
+
+	/* On each tile, enable the hypervisor to trigger interrupts. */
+	/* ISSUE: Do this before starting LIPP/LEPP? */
+	on_each_cpu(tile_net_enable_intr, (void *)dev, 1);
+
+	/* Start our transmit queue. */
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+
+/*
+ * Called periodically to retry bringing up the NetIO interface,
+ * if it doesn't come up cleanly during tile_net_open().
+ */
+static void tile_net_open_retry(struct work_struct *w)
+{
+	struct delayed_work *dw =
+		container_of(w, struct delayed_work, work);
+
+	struct tile_net_priv *priv =
+		container_of(dw, struct tile_net_priv, retry_work);
+
+	/*
+	 * Try to bring the NetIO interface up.  If it fails, reschedule
+	 * ourselves to try again later; otherwise, tell Linux we now have
+	 * a working link.  ISSUE: What if the return value is negative?
+	 */
+	if (tile_net_open_inner(priv->dev))
+		schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
+					 TILE_NET_RETRY_INTERVAL);
+	else
+		netif_carrier_on(priv->dev);
+}
+
+
+/*
+ * Called when a network interface is made active.
+ *
+ * Returns 0 on success, negative value on failure.
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ *
+ * If the actual link is not available yet, then we tell Linux that
+ * we have no carrier, and we keep checking until the link comes up.
+ */
+static int tile_net_open(struct net_device *dev)
+{
+	int ret = 0;
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	/*
+	 * We rely on priv->partly_opened to tell us if this is the
+	 * first time this interface is being brought up. If it is
+	 * set, the IPP was already initialized and should not be
+	 * initialized again.
+	 */
+	if (!priv->partly_opened) {
+
+		int count;
+		int credits;
+
+		/* Initialize LIPP/LEPP, and start the Shim. */
+		ret = tile_net_open_aux(dev);
+		if (ret < 0) {
+			pr_err("tile_net_open_aux failed: %d\n", ret);
+			return ret;
+		}
+
+		/* Analyze the network cpus. */
+
+		if (network_cpus_used)
+			cpumask_copy(&priv->network_cpus_map,
+				     &network_cpus_map);
+		else
+			cpumask_copy(&priv->network_cpus_map, cpu_online_mask);
+
+
+		count = cpumask_weight(&priv->network_cpus_map);
+
+		/* Limit credits to available buffers, and apply min. */
+		credits = max(16, (LIPP_LARGE_BUFFERS / count) & ~1);
+
+		/* Apply "GBE" max limit. */
+		/* ISSUE: Use higher limit for XGBE? */
+		credits = min(NETIO_MAX_RECEIVE_PKTS, credits);
+
+		priv->network_cpus_count = count;
+		priv->network_cpus_credits = credits;
+
+#ifdef TILE_NET_DEBUG
+		pr_info("Using %d network cpus, with %d credits each\n",
+		       priv->network_cpus_count, priv->network_cpus_credits);
+#endif
+
+		priv->partly_opened = 1;
+	}
+
+	/*
+	 * Attempt to bring up the link.
+	 */
+	ret = tile_net_open_inner(dev);
+	if (ret <= 0) {
+		if (ret == 0)
+			netif_carrier_on(dev);
+		return ret;
+	}
+
+	/*
+	 * We were unable to bring up the NetIO interface, but we want to
+	 * try again in a little bit.  Tell Linux that we have no carrier
+	 * so it doesn't try to use the interface before the link comes up
+	 * and then remember to try again later.
+	 */
+	netif_carrier_off(dev);
+	schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
+				 TILE_NET_RETRY_INTERVAL);
+
+	return 0;
+}
+
+
+/*
+ * Disables a network interface.
+ *
+ * Returns 0, this is not allowed to fail.
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ *
+ * ISSUE: Can this can be called while "tile_net_poll()" is running?
+ */
+static int tile_net_stop(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	bool pending = true;
+
+	PDEBUG("tile_net_stop()\n");
+
+	/* ISSUE: Only needed if not yet fully open. */
+	cancel_delayed_work_sync(&priv->retry_work);
+
+	/* Can't transmit any more. */
+	netif_stop_queue(dev);
+
+	/*
+	 * Disable hypervisor interrupts on each tile.
+	 */
+	on_each_cpu(tile_net_disable_intr, (void *)dev, 1);
+
+	/*
+	 * Unregister the interrupt handler.
+	 * The __ffs() function returns the index into the interrupt handler
+	 * table from the interrupt bit mask which should have one bit
+	 * and one bit only set.
+	 */
+	if (priv->intr_id)
+		free_irq(__ffs(priv->intr_id), dev);
+
+	/*
+	 * Drain all the LIPP buffers.
+	 */
+
+	while (true) {
+		int buffer;
+
+		/* NOTE: This should never fail. */
+		if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&buffer,
+				 sizeof(buffer), NETIO_IPP_DRAIN_OFF) < 0)
+			break;
+
+		/* Stop when done. */
+		if (buffer == 0)
+			break;
+
+		{
+			/* Convert "linux_buffer_t" to "va". */
+			void *va = __va((phys_addr_t)(buffer >> 1) << 7);
+
+			/* Acquire the associated "skb". */
+			struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
+			struct sk_buff *skb = *skb_ptr;
+
+			kfree_skb(skb);
+		}
+	}
+
+	/* Stop LIPP/LEPP. */
+	tile_net_stop_aux(dev);
+
+
+	priv->fully_opened = 0;
+
+
+	/*
+	 * XXX: ISSUE: It appears that, in practice anyway, by the
+	 * time we get here, there are no pending completions.
+	 */
+	while (pending) {
+
+		struct sk_buff *olds[32];
+		unsigned int wanted = 32;
+		unsigned int i, nolds = 0;
+
+		nolds = tile_net_lepp_grab_comps(dev, olds,
+						 wanted, &pending);
+
+		/* ISSUE: We have never actually seen this debug spew. */
+		if (nolds != 0)
+			pr_info("During tile_net_stop(), grabbed %d comps.\n",
+			       nolds);
+
+		for (i = 0; i < nolds; i++)
+			kfree_skb(olds[i]);
+	}
+
+
+	/* Wipe the EPP queue. */
+	memset(priv->epp_queue, 0, sizeof(lepp_queue_t));
+
+	/* Evict the EPP queue. */
+	finv_buffer(priv->epp_queue, PAGE_SIZE);
+
+	return 0;
+}
+
+
+/*
+ * Prepare the "frags" info for the resulting LEPP command.
+ *
+ * If needed, flush the memory used by the frags.
+ */
+static unsigned int tile_net_tx_frags(lepp_frag_t *frags,
+				      struct sk_buff *skb,
+				      void *b_data, unsigned int b_len)
+{
+	unsigned int i, n = 0;
+
+	struct skb_shared_info *sh = skb_shinfo(skb);
+
+	phys_addr_t cpa;
+
+	if (b_len != 0) {
+
+		if (!hash_default)
+			finv_buffer_remote(b_data, b_len);
+
+		cpa = __pa(b_data);
+		frags[n].cpa_lo = cpa;
+		frags[n].cpa_hi = cpa >> 32;
+		frags[n].length = b_len;
+		frags[n].hash_for_home = hash_default;
+		n++;
+	}
+
+	for (i = 0; i < sh->nr_frags; i++) {
+
+		skb_frag_t *f = &sh->frags[i];
+		unsigned long pfn = page_to_pfn(f->page);
+
+		/* FIXME: Compute "hash_for_home" properly. */
+		/* ISSUE: The hypervisor checks CHIP_HAS_REV1_DMA_PACKETS(). */
+		int hash_for_home = hash_default;
+
+		/* FIXME: Hmmm. */
+		if (!hash_default) {
+			void *va = pfn_to_kaddr(pfn) + f->page_offset;
+			BUG_ON(PageHighMem(f->page));
+			finv_buffer_remote(va, f->size);
+		}
+
+		cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset;
+		frags[n].cpa_lo = cpa;
+		frags[n].cpa_hi = cpa >> 32;
+		frags[n].length = f->size;
+		frags[n].hash_for_home = hash_for_home;
+		n++;
+	}
+
+	return n;
+}
+
+
+/*
+ * This function takes "skb", consisting of a header template and a
+ * payload, and hands it to LEPP, to emit as one or more segments,
+ * each consisting of a possibly modified header, plus a piece of the
+ * payload, via a process known as "tcp segmentation offload".
+ *
+ * Usually, "data" will contain the header template, of size "sh_len",
+ * and "sh->frags" will contain "skb->data_len" bytes of payload, and
+ * there will be "sh->gso_segs" segments.
+ *
+ * Sometimes, if "sendfile()" requires copying, we will be called with
+ * "data" containing the header and payload, with "frags" being empty.
+ *
+ * In theory, "sh->nr_frags" could be 3, but in practice, it seems
+ * that this will never actually happen.
+ *
+ * See "emulate_large_send_offload()" for some reference code, which
+ * does not handle checksumming.
+ *
+ * ISSUE: How do we make sure that high memory DMA does not migrate?
+ */
+static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+	struct tile_net_stats_t *stats = &info->stats;
+
+	struct skb_shared_info *sh = skb_shinfo(skb);
+
+	unsigned char *data = skb->data;
+
+	/* The ip header follows the ethernet header. */
+	struct iphdr *ih = ip_hdr(skb);
+	unsigned int ih_len = ih->ihl * 4;
+
+	/* Note that "nh == ih", by definition. */
+	unsigned char *nh = skb_network_header(skb);
+	unsigned int eh_len = nh - data;
+
+	/* The tcp header follows the ip header. */
+	struct tcphdr *th = (struct tcphdr *)(nh + ih_len);
+	unsigned int th_len = th->doff * 4;
+
+	/* The total number of header bytes. */
+	/* NOTE: This may be less than skb_headlen(skb). */
+	unsigned int sh_len = eh_len + ih_len + th_len;
+
+	/* The number of payload bytes at "skb->data + sh_len". */
+	/* This is non-zero for sendfile() without HIGHDMA. */
+	unsigned int b_len = skb_headlen(skb) - sh_len;
+
+	/* The total number of payload bytes. */
+	unsigned int d_len = b_len + skb->data_len;
+
+	/* The maximum payload size. */
+	unsigned int p_len = sh->gso_size;
+
+	/* The total number of segments. */
+	unsigned int num_segs = sh->gso_segs;
+
+	/* The temporary copy of the command. */
+	u32 cmd_body[(LEPP_MAX_CMD_SIZE + 3) / 4];
+	lepp_tso_cmd_t *cmd = (lepp_tso_cmd_t *)cmd_body;
+
+	/* Analyze the "frags". */
+	unsigned int num_frags =
+		tile_net_tx_frags(cmd->frags, skb, data + sh_len, b_len);
+
+	/* The size of the command, including frags and header. */
+	size_t cmd_size = LEPP_TSO_CMD_SIZE(num_frags, sh_len);
+
+	/* The command header. */
+	lepp_tso_cmd_t cmd_init = {
+		.tso = true,
+		.header_size = sh_len,
+		.ip_offset = eh_len,
+		.tcp_offset = eh_len + ih_len,
+		.payload_size = p_len,
+		.num_frags = num_frags,
+	};
+
+	unsigned long irqflags;
+
+	lepp_queue_t *eq = priv->epp_queue;
+
+	struct sk_buff *olds[4];
+	unsigned int wanted = 4;
+	unsigned int i, nolds = 0;
+
+	unsigned int cmd_head, cmd_tail, cmd_next;
+	unsigned int comp_tail;
+
+	unsigned int free_slots;
+
+
+	/* Paranoia. */
+	BUG_ON(skb->protocol != htons(ETH_P_IP));
+	BUG_ON(ih->protocol != IPPROTO_TCP);
+	BUG_ON(skb->ip_summed != CHECKSUM_PARTIAL);
+	BUG_ON(num_frags > LEPP_MAX_FRAGS);
+	/*--BUG_ON(num_segs != (d_len + (p_len - 1)) / p_len); */
+	BUG_ON(num_segs <= 1);
+
+
+	/* Finish preparing the command. */
+
+	/* Copy the command header. */
+	*cmd = cmd_init;
+
+	/* Copy the "header". */
+	memcpy(&cmd->frags[num_frags], data, sh_len);
+
+
+	/* Prefetch and wait, to minimize time spent holding the spinlock. */
+	prefetch_L1(&eq->comp_tail);
+	prefetch_L1(&eq->cmd_tail);
+	mb();
+
+
+	/* Enqueue the command. */
+
+	spin_lock_irqsave(&priv->cmd_lock, irqflags);
+
+	/*
+	 * Handle completions if needed to make room.
+	 * HACK: Spin until there is sufficient room.
+	 */
+	free_slots = lepp_num_free_comp_slots(eq);
+	if (free_slots < 1) {
+spin:
+		nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
+						  wanted - nolds, NULL);
+		if (lepp_num_free_comp_slots(eq) < 1)
+			goto spin;
+	}
+
+	cmd_head = eq->cmd_head;
+	cmd_tail = eq->cmd_tail;
+
+	/* NOTE: The "gotos" below are untested. */
+
+	/* Prepare to advance, detecting full queue. */
+	cmd_next = cmd_tail + cmd_size;
+	if (cmd_tail < cmd_head && cmd_next >= cmd_head)
+		goto spin;
+	if (cmd_next > LEPP_CMD_LIMIT) {
+		cmd_next = 0;
+		if (cmd_next == cmd_head)
+			goto spin;
+	}
+
+	/* Copy the command. */
+	memcpy(&eq->cmds[cmd_tail], cmd, cmd_size);
+
+	/* Advance. */
+	cmd_tail = cmd_next;
+
+	/* Record "skb" for eventual freeing. */
+	comp_tail = eq->comp_tail;
+	eq->comps[comp_tail] = skb;
+	LEPP_QINC(comp_tail);
+	eq->comp_tail = comp_tail;
+
+	/* Flush before allowing LEPP to handle the command. */
+	__insn_mf();
+
+	eq->cmd_tail = cmd_tail;
+
+	spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
+
+	if (nolds == 0)
+		nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
+
+	/* Handle completions. */
+	for (i = 0; i < nolds; i++)
+		kfree_skb(olds[i]);
+
+	/* Update stats. */
+	stats->tx_packets += num_segs;
+	stats->tx_bytes += (num_segs * sh_len) + d_len;
+
+	/* Make sure the egress timer is scheduled. */
+	tile_net_schedule_egress_timer(info);
+
+	return NETDEV_TX_OK;
+}
+
+
+/*
+ * Transmit a packet (called by the kernel via "hard_start_xmit" hook).
+ */
+static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	int my_cpu = smp_processor_id();
+	struct tile_net_cpu *info = priv->cpu[my_cpu];
+	struct tile_net_stats_t *stats = &info->stats;
+
+	unsigned long irqflags;
+
+	struct skb_shared_info *sh = skb_shinfo(skb);
+
+	unsigned int len = skb->len;
+	unsigned char *data = skb->data;
+
+	unsigned int csum_start = skb->csum_start - skb_headroom(skb);
+
+	lepp_frag_t frags[LEPP_MAX_FRAGS];
+
+	unsigned int num_frags;
+
+	lepp_queue_t *eq = priv->epp_queue;
+
+	struct sk_buff *olds[4];
+	unsigned int wanted = 4;
+	unsigned int i, nolds = 0;
+
+	unsigned int cmd_size = sizeof(lepp_cmd_t);
+
+	unsigned int cmd_head, cmd_tail, cmd_next;
+	unsigned int comp_tail;
+
+	lepp_cmd_t cmds[LEPP_MAX_FRAGS];
+
+	unsigned int free_slots;
+
+
+	/*
+	 * This is paranoia, since we think that if the link doesn't come
+	 * up, telling Linux we have no carrier will keep it from trying
+	 * to transmit.  If it does, though, we can't execute this routine,
+	 * since data structures we depend on aren't set up yet.
+	 */
+	if (!info->registered)
+		return NETDEV_TX_BUSY;
+
+
+	/* Save the timestamp. */
+	dev->trans_start = jiffies;
+
+
+#ifdef TILE_NET_PARANOIA
+#if CHIP_HAS_CBOX_HOME_MAP()
+	if (hash_default) {
+		HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data);
+		if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
+			panic("Non-coherent egress buffer!");
+	}
+#endif
+#endif
+
+
+#ifdef TILE_NET_DUMP_PACKETS
+	/* ISSUE: Does not dump the "frags". */
+	dump_packet(data, skb_headlen(skb), "tx");
+#endif /* TILE_NET_DUMP_PACKETS */
+
+
+	if (sh->gso_size != 0)
+		return tile_net_tx_tso(skb, dev);
+
+
+	/* Prepare the commands. */
+
+	num_frags = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));
+
+	for (i = 0; i < num_frags; i++) {
+
+		bool final = (i == num_frags - 1);
+
+		lepp_cmd_t cmd = {
+			.cpa_lo = frags[i].cpa_lo,
+			.cpa_hi = frags[i].cpa_hi,
+			.length = frags[i].length,
+			.hash_for_home = frags[i].hash_for_home,
+			.send_completion = final,
+			.end_of_packet = final
+		};
+
+		if (i == 0 && skb->ip_summed == CHECKSUM_PARTIAL) {
+			cmd.compute_checksum = 1;
+			cmd.checksum_data.bits.start_byte = csum_start;
+			cmd.checksum_data.bits.count = len - csum_start;
+			cmd.checksum_data.bits.destination_byte =
+				csum_start + skb->csum_offset;
+		}
+
+		cmds[i] = cmd;
+	}
+
+
+	/* Prefetch and wait, to minimize time spent holding the spinlock. */
+	prefetch_L1(&eq->comp_tail);
+	prefetch_L1(&eq->cmd_tail);
+	mb();
+
+
+	/* Enqueue the commands. */
+
+	spin_lock_irqsave(&priv->cmd_lock, irqflags);
+
+	/*
+	 * Handle completions if needed to make room.
+	 * HACK: Spin until there is sufficient room.
+	 */
+	free_slots = lepp_num_free_comp_slots(eq);
+	if (free_slots < 1) {
+spin:
+		nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
+						  wanted - nolds, NULL);
+		if (lepp_num_free_comp_slots(eq) < 1)
+			goto spin;
+	}
+
+	cmd_head = eq->cmd_head;
+	cmd_tail = eq->cmd_tail;
+
+	/* NOTE: The "gotos" below are untested. */
+
+	/* Copy the commands, or fail. */
+	for (i = 0; i < num_frags; i++) {
+
+		/* Prepare to advance, detecting full queue. */
+		cmd_next = cmd_tail + cmd_size;
+		if (cmd_tail < cmd_head && cmd_next >= cmd_head)
+			goto spin;
+		if (cmd_next > LEPP_CMD_LIMIT) {
+			cmd_next = 0;
+			if (cmd_next == cmd_head)
+				goto spin;
+		}
+
+		/* Copy the command. */
+		*(lepp_cmd_t *)&eq->cmds[cmd_tail] = cmds[i];
+
+		/* Advance. */
+		cmd_tail = cmd_next;
+	}
+
+	/* Record "skb" for eventual freeing. */
+	comp_tail = eq->comp_tail;
+	eq->comps[comp_tail] = skb;
+	LEPP_QINC(comp_tail);
+	eq->comp_tail = comp_tail;
+
+	/* Flush before allowing LEPP to handle the command. */
+	__insn_mf();
+
+	eq->cmd_tail = cmd_tail;
+
+	spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
+
+	if (nolds == 0)
+		nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
+
+	/* Handle completions. */
+	for (i = 0; i < nolds; i++)
+		kfree_skb(olds[i]);
+
+	/* HACK: Track "expanded" size for short packets (e.g. 42 < 60). */
+	stats->tx_packets++;
+	stats->tx_bytes += ((len >= ETH_ZLEN) ? len : ETH_ZLEN);
+
+	/* Make sure the egress timer is scheduled. */
+	tile_net_schedule_egress_timer(info);
+
+	return NETDEV_TX_OK;
+}
+
+
+/*
+ * Deal with a transmit timeout.
+ */
+static void tile_net_tx_timeout(struct net_device *dev)
+{
+	PDEBUG("tile_net_tx_timeout()\n");
+	PDEBUG("Transmit timeout at %ld, latency %ld\n", jiffies,
+	       jiffies - dev->trans_start);
+
+	/* XXX: ISSUE: This doesn't seem useful for us. */
+	netif_wake_queue(dev);
+}
+
+
+/*
+ * Ioctl commands.
+ */
+static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	return -EOPNOTSUPP;
+}
+
+
+/*
+ * Get System Network Statistics.
+ *
+ * Returns the address of the device statistics structure.
+ */
+static struct net_device_stats *tile_net_get_stats(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+	u32 rx_packets = 0;
+	u32 tx_packets = 0;
+	u32 rx_bytes = 0;
+	u32 tx_bytes = 0;
+	int i;
+
+	for_each_online_cpu(i) {
+		if (priv->cpu[i]) {
+			rx_packets += priv->cpu[i]->stats.rx_packets;
+			rx_bytes += priv->cpu[i]->stats.rx_bytes;
+			tx_packets += priv->cpu[i]->stats.tx_packets;
+			tx_bytes += priv->cpu[i]->stats.tx_bytes;
+		}
+	}
+
+	priv->stats.rx_packets = rx_packets;
+	priv->stats.rx_bytes = rx_bytes;
+	priv->stats.tx_packets = tx_packets;
+	priv->stats.tx_bytes = tx_bytes;
+
+	return &priv->stats;
+}
+
+
+/*
+ * Change the "mtu".
+ *
+ * The "change_mtu" method is usually not needed.
+ * If you need it, it must be like this.
+ */
+static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
+{
+	PDEBUG("tile_net_change_mtu()\n");
+
+	/* Check ranges. */
+	if ((new_mtu < 68) || (new_mtu > 1500))
+		return -EINVAL;
+
+	/* Accept the value. */
+	dev->mtu = new_mtu;
+
+	return 0;
+}
+
+
+/*
+ * Change the Ethernet Address of the NIC.
+ *
+ * The hypervisor driver does not support changing MAC address.  However,
+ * the IPP does not do anything with the MAC address, so the address which
+ * gets used on outgoing packets, and which is accepted on incoming packets,
+ * is completely up to the NetIO program or kernel driver which is actually
+ * handling them.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int tile_net_set_mac_address(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EINVAL;
+
+	/* ISSUE: Note that "dev_addr" is now a pointer. */
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+
+	return 0;
+}
+
+
+/*
+ * Obtain the MAC address from the hypervisor.
+ * This must be done before opening the device.
+ */
+static int tile_net_get_mac(struct net_device *dev)
+{
+	struct tile_net_priv *priv = netdev_priv(dev);
+
+	char hv_dev_name[32];
+	int len;
+
+	__netio_getset_offset_t offset = { .word = NETIO_IPP_PARAM_OFF };
+
+	int ret;
+
+	/* For example, "xgbe0". */
+	strcpy(hv_dev_name, dev->name);
+	len = strlen(hv_dev_name);
+
+	/* For example, "xgbe/0". */
+	hv_dev_name[len] = hv_dev_name[len - 1];
+	hv_dev_name[len - 1] = '/';
+	len++;
+
+	/* For example, "xgbe/0/native_hash". */
+	strcpy(hv_dev_name + len, hash_default ? "/native_hash" : "/native");
+
+	/* Get the hypervisor handle for this device. */
+	priv->hv_devhdl = hv_dev_open((HV_VirtAddr)hv_dev_name, 0);
+	PDEBUG("hv_dev_open(%s) returned %d %p\n",
+	       hv_dev_name, priv->hv_devhdl, &priv->hv_devhdl);
+	if (priv->hv_devhdl < 0) {
+		if (priv->hv_devhdl == HV_ENODEV)
+			printk(KERN_DEBUG "Ignoring unconfigured device %s\n",
+				 hv_dev_name);
+		else
+			printk(KERN_DEBUG "hv_dev_open(%s) returned %d\n",
+				 hv_dev_name, priv->hv_devhdl);
+		return -1;
+	}
+
+	/*
+	 * Read the hardware address from the hypervisor.
+	 * ISSUE: Note that "dev_addr" is now a pointer.
+	 */
+	offset.bits.class = NETIO_PARAM;
+	offset.bits.addr = NETIO_PARAM_MAC;
+	ret = hv_dev_pread(priv->hv_devhdl, 0,
+			   (HV_VirtAddr)dev->dev_addr, dev->addr_len,
+			   offset.word);
+	PDEBUG("hv_dev_pread(NETIO_PARAM_MAC) returned %d\n", ret);
+	if (ret <= 0) {
+		printk(KERN_DEBUG "hv_dev_pread(NETIO_PARAM_MAC) %s failed\n",
+		       dev->name);
+		/*
+		 * Since the device is configured by the hypervisor but we
+		 * can't get its MAC address, we are most likely running
+		 * the simulator, so let's generate a random MAC address.
+		 */
+		random_ether_addr(dev->dev_addr);
+	}
+
+	return 0;
+}
+
+
+static struct net_device_ops tile_net_ops = {
+	.ndo_open = tile_net_open,
+	.ndo_stop = tile_net_stop,
+	.ndo_start_xmit = tile_net_tx,
+	.ndo_do_ioctl = tile_net_ioctl,
+	.ndo_get_stats = tile_net_get_stats,
+	.ndo_change_mtu = tile_net_change_mtu,
+	.ndo_tx_timeout = tile_net_tx_timeout,
+	.ndo_set_mac_address = tile_net_set_mac_address
+};
+
+
+/*
+ * The setup function.
+ *
+ * This uses ether_setup() to assign various fields in dev, including
+ * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields.
+ */
+static void tile_net_setup(struct net_device *dev)
+{
+	PDEBUG("tile_net_setup()\n");
+
+	ether_setup(dev);
+
+	dev->netdev_ops = &tile_net_ops;
+
+	dev->watchdog_timeo = TILE_NET_TIMEOUT;
+
+	/* We want lockless xmit. */
+	dev->features |= NETIF_F_LLTX;
+
+	/* We support hardware tx checksums. */
+	dev->features |= NETIF_F_HW_CSUM;
+
+	/* We support scatter/gather. */
+	dev->features |= NETIF_F_SG;
+
+	/* We support TSO. */
+	dev->features |= NETIF_F_TSO;
+
+#ifdef TILE_NET_GSO
+	/* We support GSO. */
+	dev->features |= NETIF_F_GSO;
+#endif
+
+	if (hash_default)
+		dev->features |= NETIF_F_HIGHDMA;
+
+	/* ISSUE: We should support NETIF_F_UFO. */
+
+	dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN;
+
+	dev->mtu = TILE_NET_MTU;
+}
+
+
+/*
+ * Allocate the device structure, register the device, and obtain the
+ * MAC address from the hypervisor.
+ */
+static struct net_device *tile_net_dev_init(const char *name)
+{
+	int ret;
+	struct net_device *dev;
+	struct tile_net_priv *priv;
+	struct page *page;
+
+	/*
+	 * Allocate the device structure.  This allocates "priv", calls
+	 * tile_net_setup(), and saves "name".  Normally, "name" is a
+	 * template, instantiated by register_netdev(), but not for us.
+	 */
+	dev = alloc_netdev(sizeof(*priv), name, tile_net_setup);
+	if (!dev) {
+		pr_err("alloc_netdev(%s) failed\n", name);
+		return NULL;
+	}
+
+	priv = netdev_priv(dev);
+
+	/* Initialize "priv". */
+
+	memset(priv, 0, sizeof(*priv));
+
+	/* Save "dev" for "tile_net_open_retry()". */
+	priv->dev = dev;
+
+	INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry);
+
+	spin_lock_init(&priv->cmd_lock);
+	spin_lock_init(&priv->comp_lock);
+
+	/* Allocate "epp_queue". */
+	BUG_ON(get_order(sizeof(lepp_queue_t)) != 0);
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
+	if (!page) {
+		free_netdev(dev);
+		return NULL;
+	}
+	priv->epp_queue = page_address(page);
+
+	/* Register the network device. */
+	ret = register_netdev(dev);
+	if (ret) {
+		pr_err("register_netdev %s failed %d\n", dev->name, ret);
+		free_page((unsigned long)priv->epp_queue);
+		free_netdev(dev);
+		return NULL;
+	}
+
+	/* Get the MAC address. */
+	ret = tile_net_get_mac(dev);
+	if (ret < 0) {
+		unregister_netdev(dev);
+		free_page((unsigned long)priv->epp_queue);
+		free_netdev(dev);
+		return NULL;
+	}
+
+	return dev;
+}
+
+
+/*
+ * Module cleanup.
+ */
+static void tile_net_cleanup(void)
+{
+	int i;
+
+	for (i = 0; i < TILE_NET_DEVS; i++) {
+		if (tile_net_devs[i]) {
+			struct net_device *dev = tile_net_devs[i];
+			struct tile_net_priv *priv = netdev_priv(dev);
+			unregister_netdev(dev);
+			finv_buffer(priv->epp_queue, PAGE_SIZE);
+			free_page((unsigned long)priv->epp_queue);
+			free_netdev(dev);
+		}
+	}
+}
+
+
+/*
+ * Module initialization.
+ */
+static int tile_net_init_module(void)
+{
+	pr_info("Tilera IPP Net Driver\n");
+
+	tile_net_devs[0] = tile_net_dev_init("xgbe0");
+	tile_net_devs[1] = tile_net_dev_init("xgbe1");
+	tile_net_devs[2] = tile_net_dev_init("gbe0");
+	tile_net_devs[3] = tile_net_dev_init("gbe1");
+
+	return 0;
+}
+
+
+#ifndef MODULE
+/*
+ * The "network_cpus" boot argument specifies the cpus that are dedicated
+ * to handle ingress packets.
+ *
+ * The parameter should be in the form "network_cpus=m-n[,x-y]", where
+ * m, n, x, y are integer numbers that represent the cpus that can be
+ * neither a dedicated cpu nor a dataplane cpu.
+ */
+static int __init network_cpus_setup(char *str)
+{
+	int rc = cpulist_parse_crop(str, &network_cpus_map);
+	if (rc != 0) {
+		pr_warning("network_cpus=%s: malformed cpu list\n",
+		       str);
+	} else {
+
+		/* Remove dedicated cpus. */
+		cpumask_and(&network_cpus_map, &network_cpus_map,
+			    cpu_possible_mask);
+
+
+		if (cpumask_empty(&network_cpus_map)) {
+			pr_warning("Ignoring network_cpus='%s'.\n",
+			       str);
+		} else {
+			char buf[1024];
+			cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map);
+			pr_info("Linux network CPUs: %s\n", buf);
+			network_cpus_used = true;
+		}
+	}
+
+	return 0;
+}
+__setup("network_cpus=", network_cpus_setup);
+#endif
+
+
+module_init(tile_net_init_module);
+module_exit(tile_net_cleanup);
-- 
cgit v1.2.3


From f02cbbe657939489347cbda598401a56913ffcbd Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 2 Nov 2010 12:05:10 -0400
Subject: pci root complex: support for tile architecture

This change enables PCI root complex support for TILEPro.  Unlike
TILE-Gx, TILEPro has no support for memory-mapped I/O, so the PCI
support consists of hypervisor upcalls for PIO, DMA, etc.  However,
the performance is fine for the devices we have tested with so far
(1Gb Ethernet, SATA, etc.).

The <asm/io.h> header was tweaked to be a little bit more aggressive
about disabling attempts to map/unmap IO port space.  The hacky
<asm/pci-bridge.h> header was rolled into the <asm/pci.h> header
and the result was simplified.  Both of the latter two headers were
preliminary versions not meant for release before now - oh well.

There is one quirk for our TILEmpower platform, which accidentally
negotiates up to 5GT and needs to be kicked down to 2.5GT.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/Kconfig                  |  12 +
 arch/tile/include/asm/io.h         |  15 +-
 arch/tile/include/asm/pci-bridge.h | 117 -------
 arch/tile/include/asm/pci.h        | 107 +++----
 arch/tile/kernel/Makefile          |   1 +
 arch/tile/kernel/pci.c             | 621 +++++++++++++++++++++++++++++++++++++
 drivers/pci/Makefile               |   1 +
 drivers/pci/quirks.c               |  18 ++
 8 files changed, 705 insertions(+), 187 deletions(-)
 delete mode 100644 arch/tile/include/asm/pci-bridge.h
 create mode 100644 arch/tile/kernel/pci.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 07ec8a865c1d..e11b5fcb70eb 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -329,6 +329,18 @@ endmenu  # Tilera-specific configuration
 
 menu "Bus options"
 
+config PCI
+	bool "PCI support"
+	default y
+	select PCI_DOMAINS
+	---help---
+	  Enable PCI root complex support, so PCIe endpoint devices can
+	  be attached to the Tile chip.  Many, but not all, PCI devices
+	  are supported under Tilera's root complex driver.
+
+config PCI_DOMAINS
+	bool
+
 config NO_IOMEM
 	def_bool !PCI
 
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index ee43328713ab..d3cbb9b14cbe 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr);
 #define ioremap_writethrough(physaddr, size)	ioremap(physaddr, size)
 #define ioremap_fullcache(physaddr, size)	ioremap(physaddr, size)
 
-void __iomem *ioport_map(unsigned long port, unsigned int len);
-extern inline void ioport_unmap(void __iomem *addr) {}
-
 #define mmiowb()
 
 /* Conversion between virtual and physical mappings.  */
@@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
  * we never run, uses them unconditionally.
  */
 
-static inline int ioport_panic(void)
+static inline long ioport_panic(void)
 {
 	panic("inb/outb and friends do not exist on tile");
 	return 0;
 }
 
+static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *) ioport_panic();
+}
+
+static inline void ioport_unmap(void __iomem *addr)
+{
+	ioport_panic();
+}
+
 static inline u8 inb(unsigned long addr)
 {
 	return ioport_panic();
diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h
deleted file mode 100644
index e853b0e2793b..000000000000
--- a/arch/tile/include/asm/pci-bridge.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#ifndef _ASM_TILE_PCI_BRIDGE_H
-#define _ASM_TILE_PCI_BRIDGE_H
-
-#include <linux/ioport.h>
-#include <linux/pci.h>
-
-struct device_node;
-struct pci_controller;
-
-/*
- * pci_io_base returns the memory address at which you can access
- * the I/O space for PCI bus number `bus' (or NULL on error).
- */
-extern void __iomem *pci_bus_io_base(unsigned int bus);
-extern unsigned long pci_bus_io_base_phys(unsigned int bus);
-extern unsigned long pci_bus_mem_base_phys(unsigned int bus);
-
-/* Allocate a new PCI host bridge structure */
-extern struct pci_controller *pcibios_alloc_controller(void);
-
-/* Helper function for setting up resources */
-extern void pci_init_resource(struct resource *res, unsigned long start,
-			      unsigned long end, int flags, char *name);
-
-/* Get the PCI host controller for a bus */
-extern struct pci_controller *pci_bus_to_hose(int bus);
-
-/*
- * Structure of a PCI controller (host bridge)
- */
-struct pci_controller {
-	int index;		/* PCI domain number */
-	struct pci_bus *root_bus;
-
-	int first_busno;
-	int last_busno;
-
-	int hv_cfg_fd[2];	/* config{0,1} fds for this PCIe controller */
-	int hv_mem_fd;		/* fd to Hypervisor for MMIO operations */
-
-	struct pci_ops *ops;
-
-	int irq_base;		/* Base IRQ from the Hypervisor	*/
-	int plx_gen1;		/* flag for PLX Gen 1 configuration */
-
-	/* Address ranges that are routed to this controller/bridge. */
-	struct resource mem_resources[3];
-};
-
-static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
-{
-	return bus->sysdata;
-}
-
-extern void setup_indirect_pci_nomap(struct pci_controller *hose,
-			       void __iomem *cfg_addr, void __iomem *cfg_data);
-extern void setup_indirect_pci(struct pci_controller *hose,
-			       u32 cfg_addr, u32 cfg_data);
-extern void setup_grackle(struct pci_controller *hose);
-
-extern unsigned char common_swizzle(struct pci_dev *, unsigned char *);
-
-/*
- *   The following code swizzles for exactly one bridge.  The routine
- *   common_swizzle below handles multiple bridges.  But there are a
- *   some boards that don't follow the PCI spec's suggestion so we
- *   break this piece out separately.
- */
-static inline unsigned char bridge_swizzle(unsigned char pin,
-		unsigned char idsel)
-{
-	return (((pin-1) + idsel) % 4) + 1;
-}
-
-/*
- * The following macro is used to lookup irqs in a standard table
- * format for those PPC systems that do not already have PCI
- * interrupts properly routed.
- */
-/* FIXME - double check this */
-#define PCI_IRQ_TABLE_LOOKUP ({ \
-	long _ctl_ = -1; \
-	if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \
-		_ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \
-	_ctl_; \
-})
-
-/*
- * Scan the buses below a given PCI host bridge and assign suitable
- * resources to all devices found.
- */
-extern int pciauto_bus_scan(struct pci_controller *, int);
-
-#ifdef CONFIG_PCI
-extern unsigned long pci_address_to_pio(phys_addr_t address);
-#else
-static inline unsigned long pci_address_to_pio(phys_addr_t address)
-{
-	return (unsigned long)-1;
-}
-#endif
-
-#endif /* _ASM_TILE_PCI_BRIDGE_H */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index b0c15da2d5d5..c3fc458a0d32 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,7 +15,29 @@
 #ifndef _ASM_TILE_PCI_H
 #define _ASM_TILE_PCI_H
 
-#include <asm/pci-bridge.h>
+#include <linux/pci.h>
+
+/*
+ * Structure of a PCI controller (host bridge)
+ */
+struct pci_controller {
+	int index;		/* PCI domain number */
+	struct pci_bus *root_bus;
+
+	int first_busno;
+	int last_busno;
+
+	int hv_cfg_fd[2];	/* config{0,1} fds for this PCIe controller */
+	int hv_mem_fd;		/* fd to Hypervisor for MMIO operations */
+
+	struct pci_ops *ops;
+
+	int irq_base;		/* Base IRQ from the Hypervisor	*/
+	int plx_gen1;		/* flag for PLX Gen 1 configuration */
+
+	/* Address ranges that are routed to this controller/bridge. */
+	struct resource mem_resources[3];
+};
 
 /*
  * The hypervisor maps the entirety of CPA-space as bus addresses, so
@@ -24,56 +46,12 @@
  */
 #define PCI_DMA_BUS_IS_PHYS     1
 
-struct pci_controller *pci_bus_to_hose(int bus);
-unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp);
 int __init tile_pci_init(void);
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
-void __devinit pcibios_fixup_bus(struct pci_bus *bus);
 
-int __devinit _tile_cfg_read(struct pci_controller *hose,
-				    int bus,
-				    int slot,
-				    int function,
-				    int offset,
-				    int size,
-				    u32 *val);
-int __devinit _tile_cfg_write(struct pci_controller *hose,
-				     int bus,
-				     int slot,
-				     int function,
-				     int offset,
-				     int size,
-				     u32 val);
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
 
-/*
- * These are used to to config reads and writes in the early stages of
- * setup before the driver infrastructure has been set up enough to be
- * able to do config reads and writes.
- */
-#define early_cfg_read(where, size, value) \
-	_tile_cfg_read(controller, \
-		       current_bus, \
-		       pci_slot, \
-		       pci_fn, \
-		       where, \
-		       size, \
-		       value)
-
-#define early_cfg_write(where, size, value) \
-	_tile_cfg_write(controller, \
-		       current_bus, \
-		       pci_slot, \
-		       pci_fn, \
-		       where, \
-		       size, \
-		       value)
-
-
-
-#define PCICFG_BYTE	1
-#define PCICFG_WORD	2
-#define PCICFG_DWORD	4
+void __devinit pcibios_fixup_bus(struct pci_bus *bus);
 
 #define	TILE_NUM_PCIE	2
 
@@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 }
 
 /*
- * I/O space is currently not supported.
+ * pcibios_assign_all_busses() tells whether or not the bus numbers
+ * should be reassigned, in case the BIOS didn't do it correctly, or
+ * in case we don't have a BIOS and we want to let Linux do it.
  */
+static inline int pcibios_assign_all_busses(void)
+{
+	return 1;
+}
 
-#define TILE_PCIE_LOWER_IO		0x0
-#define TILE_PCIE_UPPER_IO		0x10000
-#define TILE_PCIE_PCIE_IO_SIZE		0x0000FFFF
-
-#define _PAGE_NO_CACHE		0
-#define _PAGE_GUARDED		0
-
-
-#define pcibios_assign_all_busses()    pci_assign_all_buses
-extern int pci_assign_all_buses;
-
+/*
+ * No special bus mastering setup handling.
+ */
 static inline void pcibios_set_master(struct pci_dev *dev)
 {
-	/* No special bus mastering setup handling */
 }
 
 #define PCIBIOS_MIN_MEM		0
-#define PCIBIOS_MIN_IO		TILE_PCIE_LOWER_IO
+#define PCIBIOS_MIN_IO		0
 
 /*
  * This flag tells if the platform is TILEmpower that needs
  * special configuration for the PLX switch chip.
  */
-extern int blade_pci;
+extern int tile_plx_gen1;
+
+/* Use any cpu for PCI. */
+#define cpumask_of_pcibus(bus) cpu_online_mask
 
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
 #include <asm-generic/pci-dma-compat.h>
@@ -122,7 +100,4 @@ extern int blade_pci;
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
-/* Use any cpu for PCI. */
-#define cpumask_of_pcibus(bus) cpu_online_mask
-
 #endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 112b1e248f05..b4c8e8ec45dc 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_SMP)		+= smpboot.o smp.o tlb.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_PCI)		+= pci.o
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
new file mode 100644
index 000000000000..a1ee25be9ad9
--- /dev/null
+++ b/arch/tile/kernel/pci.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/capability.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/byteorder.h>
+#include <asm/hv_driver.h>
+#include <hv/drv_pcie_rc_intf.h>
+
+
+/*
+ * Initialization flow and process
+ * -------------------------------
+ *
+ * This files containes the routines to search for PCI buses,
+ * enumerate the buses, and configure any attached devices.
+ *
+ * There are two entry points here:
+ * 1) tile_pci_init
+ *    This sets up the pci_controller structs, and opens the
+ *    FDs to the hypervisor.  This is called from setup_arch() early
+ *    in the boot process.
+ * 2) pcibios_init
+ *    This probes the PCI bus(es) for any attached hardware.  It's
+ *    called by subsys_initcall.  All of the real work is done by the
+ *    generic Linux PCI layer.
+ *
+ */
+
+/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+int __write_once tile_plx_gen1;
+
+static struct pci_controller controllers[TILE_NUM_PCIE];
+static int num_controllers;
+
+static struct pci_ops tile_cfg_ops;
+
+
+/*
+ * We don't need to worry about the alignment of resources.
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+			    resource_size_t size, resource_size_t align)
+{
+	return res->start;
+}
+EXPORT_SYMBOL(pcibios_align_resource);
+
+/*
+ * Open a FD to the hypervisor PCI device.
+ *
+ * controller_id is the controller number, config type is 0 or 1 for
+ * config0 or config1 operations.
+ */
+static int __init tile_pcie_open(int controller_id, int config_type)
+{
+	char filename[32];
+	int fd;
+
+	sprintf(filename, "pcie/%d/config%d", controller_id, config_type);
+
+	fd = hv_dev_open((HV_VirtAddr)filename, 0);
+
+	return fd;
+}
+
+
+/*
+ * Get the IRQ numbers from the HV and set up the handlers for them.
+ */
+static int __init tile_init_irqs(int controller_id,
+				 struct pci_controller *controller)
+{
+	char filename[32];
+	int fd;
+	int ret;
+	int x;
+	struct pcie_rc_config rc_config;
+
+	sprintf(filename, "pcie/%d/ctl", controller_id);
+	fd = hv_dev_open((HV_VirtAddr)filename, 0);
+	if (fd < 0) {
+		pr_err("PCI: hv_dev_open(%s) failed\n", filename);
+		return -1;
+	}
+	ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config),
+			   sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF);
+	hv_dev_close(fd);
+	if (ret != sizeof(rc_config)) {
+		pr_err("PCI: wanted %zd bytes, got %d\n",
+		       sizeof(rc_config), ret);
+		return -1;
+	}
+	/* Record irq_base so that we can map INTx to IRQ # later. */
+	controller->irq_base = rc_config.intr;
+
+	for (x = 0; x < 4; x++)
+		tile_irq_activate(rc_config.intr + x,
+				  TILE_IRQ_HW_CLEAR);
+
+	if (rc_config.plx_gen1)
+		controller->plx_gen1 = 1;
+
+	return 0;
+}
+
+/*
+ * First initialization entry point, called from setup_arch().
+ *
+ * Find valid controllers and fill in pci_controller structs for each
+ * of them.
+ *
+ * Returns the number of controllers discovered.
+ */
+int __init tile_pci_init(void)
+{
+	int i;
+
+	pr_info("PCI: Searching for controllers...\n");
+
+	/* Do any configuration we need before using the PCIe */
+
+	for (i = 0; i < TILE_NUM_PCIE; i++) {
+		int hv_cfg_fd0 = -1;
+		int hv_cfg_fd1 = -1;
+		int hv_mem_fd = -1;
+		char name[32];
+		struct pci_controller *controller;
+
+		/*
+		 * Open the fd to the HV.  If it fails then this
+		 * device doesn't exist.
+		 */
+		hv_cfg_fd0 = tile_pcie_open(i, 0);
+		if (hv_cfg_fd0 < 0)
+			continue;
+		hv_cfg_fd1 = tile_pcie_open(i, 1);
+		if (hv_cfg_fd1 < 0) {
+			pr_err("PCI: Couldn't open config fd to HV "
+			    "for controller %d\n", i);
+			goto err_cont;
+		}
+
+		sprintf(name, "pcie/%d/mem", i);
+		hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0);
+		if (hv_mem_fd < 0) {
+			pr_err("PCI: Could not open mem fd to HV!\n");
+			goto err_cont;
+		}
+
+		pr_info("PCI: Found PCI controller #%d\n", i);
+
+		controller = &controllers[num_controllers];
+
+		if (tile_init_irqs(i, controller)) {
+			pr_err("PCI: Could not initialize "
+			       "IRQs, aborting.\n");
+			goto err_cont;
+		}
+
+		controller->index = num_controllers;
+		controller->hv_cfg_fd[0] = hv_cfg_fd0;
+		controller->hv_cfg_fd[1] = hv_cfg_fd1;
+		controller->hv_mem_fd = hv_mem_fd;
+		controller->first_busno = 0;
+		controller->last_busno = 0xff;
+		controller->ops = &tile_cfg_ops;
+
+		num_controllers++;
+		continue;
+
+err_cont:
+		if (hv_cfg_fd0 >= 0)
+			hv_dev_close(hv_cfg_fd0);
+		if (hv_cfg_fd1 >= 0)
+			hv_dev_close(hv_cfg_fd1);
+		if (hv_mem_fd >= 0)
+			hv_dev_close(hv_mem_fd);
+		continue;
+	}
+
+	/*
+	 * Before using the PCIe, see if we need to do any platform-specific
+	 * configuration, such as the PLX switch Gen 1 issue on TILEmpower.
+	 */
+	for (i = 0; i < num_controllers; i++) {
+		struct pci_controller *controller = &controllers[i];
+
+		if (controller->plx_gen1)
+			tile_plx_gen1 = 1;
+	}
+
+	return num_controllers;
+}
+
+/*
+ * (pin - 1) converts from the PCI standard's [1:4] convention to
+ * a normal [0:3] range.
+ */
+static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct pci_controller *controller =
+		(struct pci_controller *)dev->sysdata;
+	return (pin - 1) + controller->irq_base;
+}
+
+
+static void __init fixup_read_and_payload_sizes(void)
+{
+	struct pci_dev *dev = NULL;
+	int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */
+	int max_read_size = 0x2; /* Limit to 512 byte reads. */
+	u16 new_values;
+
+	/* Scan for the smallest maximum payload size. */
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		int pcie_caps_offset;
+		u32 devcap;
+		int max_payload;
+
+		pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
+		if (pcie_caps_offset == 0)
+			continue;
+
+		pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP,
+				      &devcap);
+		max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD;
+		if (max_payload < smallest_max_payload)
+			smallest_max_payload = max_payload;
+	}
+
+	/* Now, set the max_payload_size for all devices to that value. */
+	new_values = (max_read_size << 12) | (smallest_max_payload << 5);
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		int pcie_caps_offset;
+		u16 devctl;
+
+		pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
+		if (pcie_caps_offset == 0)
+			continue;
+
+		pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
+				     &devctl);
+		devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
+		devctl |= new_values;
+		pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
+				      devctl);
+	}
+}
+
+
+/*
+ * Second PCI initialization entry point, called by subsys_initcall.
+ *
+ * The controllers have been set up by the time we get here, by a call to
+ * tile_pci_init.
+ */
+static int __init pcibios_init(void)
+{
+	int i;
+
+	pr_info("PCI: Probing PCI hardware\n");
+
+	/*
+	 * Delay a bit in case devices aren't ready.  Some devices are
+	 * known to require at least 20ms here, but we use a more
+	 * conservative value.
+	 */
+	mdelay(250);
+
+	/* Scan all of the recorded PCI controllers.  */
+	for (i = 0; i < num_controllers; i++) {
+		struct pci_controller *controller = &controllers[i];
+		struct pci_bus *bus;
+
+		pr_info("PCI: initializing controller #%d\n", i);
+
+		/*
+		 * This comes from the generic Linux PCI driver.
+		 *
+		 * It reads the PCI tree for this bus into the Linux
+		 * data structures.
+		 *
+		 * This is inlined in linux/pci.h and calls into
+		 * pci_scan_bus_parented() in probe.c.
+		 */
+		bus = pci_scan_bus(0, controller->ops, controller);
+		controller->root_bus = bus;
+		controller->last_busno = bus->subordinate;
+
+	}
+
+	/* Do machine dependent PCI interrupt routing */
+	pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
+
+	/*
+	 * This comes from the generic Linux PCI driver.
+	 *
+	 * It allocates all of the resources (I/O memory, etc)
+	 * associated with the devices read in above.
+	 */
+
+	pci_assign_unassigned_resources();
+
+	/* Configure the max_read_size and max_payload_size values. */
+	fixup_read_and_payload_sizes();
+
+	/* Record the I/O resources in the PCI controller structure. */
+	for (i = 0; i < num_controllers; i++) {
+		struct pci_bus *root_bus = controllers[i].root_bus;
+		struct pci_bus *next_bus;
+		struct pci_dev *dev;
+
+		list_for_each_entry(dev, &root_bus->devices, bus_list) {
+			/* Find the PCI host controller, ie. the 1st bridge. */
+			if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
+				(PCI_SLOT(dev->devfn) == 0)) {
+				next_bus = dev->subordinate;
+				controllers[i].mem_resources[0] =
+					*next_bus->resource[0];
+				controllers[i].mem_resources[1] =
+					 *next_bus->resource[1];
+				controllers[i].mem_resources[2] =
+					 *next_bus->resource[2];
+
+				break;
+			}
+		}
+
+	}
+
+	return 0;
+}
+subsys_initcall(pcibios_init);
+
+/*
+ * No bus fixups needed.
+ */
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+	/* Nothing needs to be done. */
+}
+
+/*
+ * This can be called from the generic PCI layer, but doesn't need to
+ * do anything.
+ */
+char __devinit *pcibios_setup(char *str)
+{
+	/* Nothing needs to be done. */
+	return str;
+}
+
+/*
+ * This is called from the generic Linux layer.
+ */
+void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
+
+/*
+ * Enable memory and/or address decoding, as appropriate, for the
+ * device described by the 'dev' struct.
+ *
+ * This is called from the generic PCI layer, and can be called
+ * for bridges or endpoints.
+ */
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, old_cmd;
+	u8 header_type;
+	int i;
+	struct resource *r;
+
+	pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	old_cmd = cmd;
+	if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
+		/*
+		 * For bridges, we enable both memory and I/O decoding
+		 * in call cases.
+		 */
+		cmd |= PCI_COMMAND_IO;
+		cmd |= PCI_COMMAND_MEMORY;
+	} else {
+		/*
+		 * For endpoints, we enable memory and/or I/O decoding
+		 * only if they have a memory resource of that type.
+		 */
+		for (i = 0; i < 6; i++) {
+			r = &dev->resource[i];
+			if (r->flags & IORESOURCE_UNSET) {
+				pr_err("PCI: Device %s not available "
+				       "because of resource collisions\n",
+				       pci_name(dev));
+				return -EINVAL;
+			}
+			if (r->flags & IORESOURCE_IO)
+				cmd |= PCI_COMMAND_IO;
+			if (r->flags & IORESOURCE_MEM)
+				cmd |= PCI_COMMAND_MEMORY;
+		}
+	}
+
+	/*
+	 * We only write the command if it changed.
+	 */
+	if (cmd != old_cmd)
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	return 0;
+}
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
+{
+	unsigned long start = pci_resource_start(dev, bar);
+	unsigned long len = pci_resource_len(dev, bar);
+	unsigned long flags = pci_resource_flags(dev, bar);
+
+	if (!len)
+		return NULL;
+	if (max && len > max)
+		len = max;
+
+	if (!(flags & IORESOURCE_MEM)) {
+		pr_info("PCI: Trying to map invalid resource %#lx\n", flags);
+		start = 0;
+	}
+
+	return (void __iomem *)start;
+}
+EXPORT_SYMBOL(pci_iomap);
+
+
+/****************************************************************
+ *
+ * Tile PCI config space read/write routines
+ *
+ ****************************************************************/
+
+/*
+ * These are the normal read and write ops
+ * These are expanded with macros from  pci_bus_read_config_byte() etc.
+ *
+ * devfn is the combined PCI slot & function.
+ *
+ * offset is in bytes, from the start of config space for the
+ * specified bus & slot.
+ */
+
+static int __devinit tile_cfg_read(struct pci_bus *bus,
+				   unsigned int devfn,
+				   int offset,
+				   int size,
+				   u32 *val)
+{
+	struct pci_controller *controller = bus->sysdata;
+	int busnum = bus->number & 0xff;
+	int slot = (devfn >> 3) & 0x1f;
+	int function = devfn & 0x7;
+	u32 addr;
+	int config_mode = 1;
+
+	/*
+	 * There is no bridge between the Tile and bus 0, so we
+	 * use config0 to talk to bus 0.
+	 *
+	 * If we're talking to a bus other than zero then we
+	 * must have found a bridge.
+	 */
+	if (busnum == 0) {
+		/*
+		 * We fake an empty slot for (busnum == 0) && (slot > 0),
+		 * since there is only one slot on bus 0.
+		 */
+		if (slot) {
+			*val = 0xFFFFFFFF;
+			return 0;
+		}
+		config_mode = 0;
+	}
+
+	addr = busnum << 20;		/* Bus in 27:20 */
+	addr |= slot << 15;		/* Slot (device) in 19:15 */
+	addr |= function << 12;		/* Function is in 14:12 */
+	addr |= (offset & 0xFFF);	/* byte address in 0:11 */
+
+	return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0,
+			    (HV_VirtAddr)(val), size, addr);
+}
+
+
+/*
+ * See tile_cfg_read() for relevent comments.
+ * Note that "val" is the value to write, not a pointer to that value.
+ */
+static int __devinit tile_cfg_write(struct pci_bus *bus,
+				    unsigned int devfn,
+				    int offset,
+				    int size,
+				    u32 val)
+{
+	struct pci_controller *controller = bus->sysdata;
+	int busnum = bus->number & 0xff;
+	int slot = (devfn >> 3) & 0x1f;
+	int function = devfn & 0x7;
+	u32 addr;
+	int config_mode = 1;
+	HV_VirtAddr valp = (HV_VirtAddr)&val;
+
+	/*
+	 * For bus 0 slot 0 we use config 0 accesses.
+	 */
+	if (busnum == 0) {
+		/*
+		 * We fake an empty slot for (busnum == 0) && (slot > 0),
+		 * since there is only one slot on bus 0.
+		 */
+		if (slot)
+			return 0;
+		config_mode = 0;
+	}
+
+	addr = busnum << 20;		/* Bus in 27:20 */
+	addr |= slot << 15;		/* Slot (device) in 19:15 */
+	addr |= function << 12;		/* Function is in 14:12 */
+	addr |= (offset & 0xFFF);	/* byte address in 0:11 */
+
+#ifdef __BIG_ENDIAN
+	/* Point to the correct part of the 32-bit "val". */
+	valp += 4 - size;
+#endif
+
+	return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0,
+			     valp, size, addr);
+}
+
+
+static struct pci_ops tile_cfg_ops = {
+	.read =         tile_cfg_read,
+	.write =        tile_cfg_write,
+};
+
+
+/*
+ * In the following, each PCI controller's mem_resources[1]
+ * represents its (non-prefetchable) PCI memory resource.
+ * mem_resources[0] and mem_resources[2] refer to its PCI I/O and
+ * prefetchable PCI memory resources, respectively.
+ * For more details, see pci_setup_bridge() in setup-bus.c.
+ * By comparing the target PCI memory address against the
+ * end address of controller 0, we can determine the controller
+ * that should accept the PCI memory access.
+ */
+#define TILE_READ(size, type)						\
+type _tile_read##size(unsigned long addr)				\
+{									\
+	type val;							\
+	int idx = 0;							\
+	if (addr > controllers[0].mem_resources[1].end &&		\
+	    addr > controllers[0].mem_resources[2].end)			\
+		idx = 1;                                                \
+	if (hv_dev_pread(controllers[idx].hv_mem_fd, 0,			\
+			 (HV_VirtAddr)(&val), sizeof(type), addr))	\
+		pr_err("PCI: read %zd bytes at 0x%lX failed\n",		\
+		       sizeof(type), addr);				\
+	return val;							\
+}									\
+EXPORT_SYMBOL(_tile_read##size)
+
+TILE_READ(b, u8);
+TILE_READ(w, u16);
+TILE_READ(l, u32);
+TILE_READ(q, u64);
+
+#define TILE_WRITE(size, type)						\
+void _tile_write##size(type val, unsigned long addr)			\
+{									\
+	int idx = 0;							\
+	if (addr > controllers[0].mem_resources[1].end &&		\
+	    addr > controllers[0].mem_resources[2].end)			\
+		idx = 1;                                                \
+	if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0,		\
+			  (HV_VirtAddr)(&val), sizeof(type), addr))	\
+		pr_err("PCI: write %zd bytes at 0x%lX failed\n",	\
+		       sizeof(type), addr);				\
+}									\
+EXPORT_SYMBOL(_tile_write##size)
+
+TILE_WRITE(b, u8);
+TILE_WRITE(w, u16);
+TILE_WRITE(l, u32);
+TILE_WRITE(q, u64);
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index f01e344cf4bd..98e6fdf34d30 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o
 obj-$(CONFIG_X86_VISWS) += setup-irq.o
 obj-$(CONFIG_MN10300) += setup-bus.o
 obj-$(CONFIG_MICROBLAZE) += setup-bus.o
+obj-$(CONFIG_TILE) += setup-bus.o setup-irq.o
 
 #
 # ACPI Related PCI FW Functions
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f5c63fe9db5c..6f9350cabbd5 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2136,6 +2136,24 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82865_HB,
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB,
 			quirk_unhide_mch_dev6);
 
+#ifdef CONFIG_TILE
+/*
+ * The Tilera TILEmpower platform needs to set the link speed
+ * to 2.5GT(Giga-Transfers)/s (Gen 1). The default link speed
+ * setting is 5GT/s (Gen 2). 0x98 is the Link Control2 PCIe
+ * capability register of the PEX8624 PCIe switch. The switch
+ * supports link speed auto negotiation, but falsely sets
+ * the link speed to 5GT/s.
+ */
+static void __devinit quirk_tile_plx_gen1(struct pci_dev *dev)
+{
+	if (tile_plx_gen1) {
+		pci_write_config_dword(dev, 0x98, 0x1);
+		mdelay(50);
+	}
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1);
+#endif /* CONFIG_TILE */
 
 #ifdef CONFIG_PCI_MSI
 /* Some chipsets do not support MSI. We cannot easily rely on setting
-- 
cgit v1.2.3


From 4d658d13c90f14cf3510ca15cafe2f4aa9e23d64 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 24 Nov 2010 13:42:15 -0500
Subject: arch/tile: make glibc's sysconf(_SC_NPROCESSORS_CONF) work correctly

glibc assumes that it can count /sys/devices/system/cpu/cpu* to get
the number of configured cpus.  For this to be valid on tile, we need
to generate a "cpu" entry for all cpus, including the ones that are
not currently allocated for Linux's use.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index fb0b3cbeae14..f18573643ed1 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -840,7 +840,7 @@ static int __init topology_init(void)
 	for_each_online_node(i)
 		register_one_node(i);
 
-	for_each_present_cpu(i)
+	for (i = 0; i < smp_height * smp_width; ++i)
 		register_cpu(&cpu_devices[i], i);
 
 	return 0;
-- 
cgit v1.2.3


From 3edabee2ed22ee4f98f4b4bb38a41059226a8446 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 24 Nov 2010 13:57:42 -0500
Subject: arch/tile: fix memchr() not to dereference memory for zero length

This change fixes a bug that memchr() will read the first word
of the source even if the length is zero.  Ironically, the code
was originally written with a test to avoid exactly this problem,
but to make the code conform to Linux coding standards with all
declarations preceding all statements, the first load from memory
was moved up above that test as the initial value for a variable.

The change just moves all the variable declarations to the top
of the file, with no initializers, so that the test can also be
at the top of the file.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/lib/memchr_32.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c
index 6235283b4859..cc3d9badf030 100644
--- a/arch/tile/lib/memchr_32.c
+++ b/arch/tile/lib/memchr_32.c
@@ -18,12 +18,24 @@
 
 void *memchr(const void *s, int c, size_t n)
 {
+	const uint32_t *last_word_ptr;
+	const uint32_t *p;
+	const char *last_byte_ptr;
+	uintptr_t s_int;
+	uint32_t goal, before_mask, v, bits;
+	char *ret;
+
+	if (__builtin_expect(n == 0, 0)) {
+		/* Don't dereference any memory if the array is empty. */
+		return NULL;
+	}
+
 	/* Get an aligned pointer. */
-	const uintptr_t s_int = (uintptr_t) s;
-	const uint32_t *p = (const uint32_t *)(s_int & -4);
+	s_int = (uintptr_t) s;
+	p = (const uint32_t *)(s_int & -4);
 
 	/* Create four copies of the byte for which we are looking. */
-	const uint32_t goal = 0x01010101 * (uint8_t) c;
+	goal = 0x01010101 * (uint8_t) c;
 
 	/* Read the first word, but munge it so that bytes before the array
 	 * will not match goal.
@@ -31,23 +43,14 @@ void *memchr(const void *s, int c, size_t n)
 	 * Note that this shift count expression works because we know
 	 * shift counts are taken mod 32.
 	 */
-	const uint32_t before_mask = (1 << (s_int << 3)) - 1;
-	uint32_t v = (*p | before_mask) ^ (goal & before_mask);
+	before_mask = (1 << (s_int << 3)) - 1;
+	v = (*p | before_mask) ^ (goal & before_mask);
 
 	/* Compute the address of the last byte. */
-	const char *const last_byte_ptr = (const char *)s + n - 1;
+	last_byte_ptr = (const char *)s + n - 1;
 
 	/* Compute the address of the word containing the last byte. */
-	const uint32_t *const last_word_ptr =
-	    (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
-
-	uint32_t bits;
-	char *ret;
-
-	if (__builtin_expect(n == 0, 0)) {
-		/* Don't dereference any memory if the array is empty. */
-		return NULL;
-	}
+	last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4);
 
 	while ((bits = __insn_seqb(v, goal)) == 0) {
 		if (__builtin_expect(p == last_word_ptr, 0)) {
-- 
cgit v1.2.3


From 4448008eb12f4b6bb9993584de8ec1d20b708d6f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 24 Nov 2010 11:19:05 -0800
Subject: isdn: icn: Fix stack corruption bug.

Running randconfig with ktest.pl I hit this bug:

[   16.101158] ICN-ISDN-driver Rev 1.65.6.8 mem=0x000d0000
[   16.106376] icn: (line0) ICN-2B, port 0x320 added
[   16.111064] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: c1642880
[   16.111066]
[   16.121214] Pid: 1, comm: swapper Not tainted 2.6.37-rc2-test-00124-g6656b3f #8
[   16.128499] Call Trace:
[   16.130942]  [<c0f51662>] ? printk+0x1d/0x23
[   16.135200]  [<c0f5153f>] panic+0x5c/0x162
[   16.139286]  [<c0d62a9a>] ? icn_addcard+0x6d/0xbe
[   16.143975]  [<c0445783>] print_tainted+0x0/0x8c
[   16.148582]  [<c1642880>] ? icn_init+0xd8/0xdf
[   16.153012]  [<c1642880>] icn_init+0xd8/0xdf
[   16.157271]  [<c04012e5>] do_one_initcall+0x8c/0x143
[   16.162222]  [<c16427a8>] ? icn_init+0x0/0xdf
[   16.166566]  [<c15f1a05>] kernel_init+0x13f/0x1da
[   16.171256]  [<c15f18c6>] ? kernel_init+0x0/0x1da
[   16.175945]  [<c0403bfe>] kernel_thread_helper+0x6/0x10
[   16.181181] panic occurred, switching back to text console

Looking into it I found that the stack was corrupted by the assignment
of the Rev #. The variable rev is given 10 bytes, and in this output the
characters that were copied was: " 1.65.6.8 $". Which was 11 characters
plus the null ending character for a total of 12 bytes, thus corrupting
the stack.

This patch ups the variable size to 20 bytes as well as changes the
strcpy to strncpy. I also added a check to make sure '$' is found.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/icn/icn.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/isdn/icn/icn.c b/drivers/isdn/icn/icn.c
index 2e847a90bad0..f2b5bab5e6a1 100644
--- a/drivers/isdn/icn/icn.c
+++ b/drivers/isdn/icn/icn.c
@@ -1627,7 +1627,7 @@ __setup("icn=", icn_setup);
 static int __init icn_init(void)
 {
 	char *p;
-	char rev[10];
+	char rev[20];
 
 	memset(&dev, 0, sizeof(icn_dev));
 	dev.memaddr = (membase & 0x0ffc000);
@@ -1637,9 +1637,10 @@ static int __init icn_init(void)
 	spin_lock_init(&dev.devlock);
 
 	if ((p = strchr(revision, ':'))) {
-		strcpy(rev, p + 1);
+		strncpy(rev, p + 1, 20);
 		p = strchr(rev, '$');
-		*p = 0;
+		if (p)
+			*p = 0;
 	} else
 		strcpy(rev, " ??? ");
 	printk(KERN_NOTICE "ICN-ISDN-driver Rev%smem=0x%08lx\n", rev,
-- 
cgit v1.2.3


From c39508d6f118308355468314ff414644115a07f3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 24 Nov 2010 11:47:22 -0800
Subject: tcp: Make TCP_MAXSEG minimum more correct.

Use TCP_MIN_MSS instead of constant 64.

Reported-by: Min Zhang <mzhang@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 081419969485..f15c36a706ec 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		/* Values greater than interface MTU won't take effect. However
 		 * at the point when this call is done we typically don't yet
 		 * know which interface is going to be used */
-		if (val < 64 || val > MAX_TCP_WINDOW) {
+		if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
 			err = -EINVAL;
 			break;
 		}
-- 
cgit v1.2.3


From fa0e846494792e722d817b9d3d625a4ef4896c96 Mon Sep 17 00:00:00 2001
From: Phil Blundell <philb@gnu.org>
Date: Wed, 24 Nov 2010 11:49:19 -0800
Subject: econet: disallow NULL remote addr for sendmsg(), fixes CVE-2010-3849

Later parts of econet_sendmsg() rely on saddr != NULL, so return early
with EINVAL if NULL was passed otherwise an oops may occur.

Signed-off-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f8c1ae4b41f0..e366f1bef91f 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -297,23 +297,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	mutex_lock(&econet_mutex);
 
-	if (saddr == NULL) {
-		struct econet_sock *eo = ec_sk(sk);
-
-		addr.station = eo->station;
-		addr.net     = eo->net;
-		port	     = eo->port;
-		cb	     = eo->cb;
-	} else {
-		if (msg->msg_namelen < sizeof(struct sockaddr_ec)) {
-			mutex_unlock(&econet_mutex);
-			return -EINVAL;
-		}
-		addr.station = saddr->addr.station;
-		addr.net = saddr->addr.net;
-		port = saddr->port;
-		cb = saddr->cb;
-	}
+        if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) {
+                mutex_unlock(&econet_mutex);
+                return -EINVAL;
+        }
+        addr.station = saddr->addr.station;
+        addr.net = saddr->addr.net;
+        port = saddr->port;
+        cb = saddr->cb;
 
 	/* Look for a device with the right network number. */
 	dev = net2dev_map[addr.net];
@@ -351,7 +342,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 		eb = (struct ec_cb *)&skb->cb;
 
-		/* BUG: saddr may be NULL */
 		eb->cookie = saddr->cookie;
 		eb->sec = *saddr;
 		eb->sent = ec_tx_done;
-- 
cgit v1.2.3


From 16c41745c7b92a243d0874f534c1655196c64b74 Mon Sep 17 00:00:00 2001
From: Phil Blundell <philb@gnu.org>
Date: Wed, 24 Nov 2010 11:49:53 -0800
Subject: econet: fix CVE-2010-3850

Add missing check for capable(CAP_NET_ADMIN) in SIOCSIFADDR operation.

Signed-off-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index e366f1bef91f..d41ba8e56c10 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -661,6 +661,9 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
 	err = 0;
 	switch (cmd) {
 	case SIOCSIFADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+
 		edev = dev->ec_ptr;
 		if (edev == NULL) {
 			/* Magic up a new one. */
-- 
cgit v1.2.3


From a27e13d370415add3487949c60810e36069a23a6 Mon Sep 17 00:00:00 2001
From: Phil Blundell <philb@gnu.org>
Date: Wed, 24 Nov 2010 11:51:47 -0800
Subject: econet: fix CVE-2010-3848

Don't declare variable sized array of iovecs on the stack since this
could cause stack overflow if msg->msgiovlen is large.  Instead, coalesce
the user-supplied data into a new buffer and use a single iovec for it.

Signed-off-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 62 +++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index d41ba8e56c10..13992e1d2726 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
 #include <linux/skbuff.h>
 #include <linux/udp.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <net/sock.h>
 #include <net/inet_common.h>
 #include <linux/stat.h>
@@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 #endif
 #ifdef CONFIG_ECONET_AUNUDP
 	struct msghdr udpmsg;
-	struct iovec iov[msg->msg_iovlen+1];
+	struct iovec iov[2];
 	struct aunhdr ah;
 	struct sockaddr_in udpdest;
 	__kernel_size_t size;
-	int i;
 	mm_segment_t oldfs;
+	char *userbuf;
 #endif
 
 	/*
@@ -319,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		}
 	}
 
-	if (len + 15 > dev->mtu) {
-		mutex_unlock(&econet_mutex);
-		return -EMSGSIZE;
-	}
-
 	if (dev->type == ARPHRD_ECONET) {
 		/* Real hardware Econet.  We're not worthy etc. */
 #ifdef CONFIG_ECONET_NATIVE
 		unsigned short proto = 0;
 		int res;
 
+		if (len + 15 > dev->mtu) {
+			mutex_unlock(&econet_mutex);
+			return -EMSGSIZE;
+		}
+
 		dev_hold(dev);
 
 		skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
@@ -405,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENETDOWN;		/* No socket - can't send */
 	}
 
+	if (len > 32768) {
+		err = -E2BIG;
+		goto error;
+	}
+
 	/* Make up a UDP datagram and hand it off to some higher intellect. */
 
 	memset(&udpdest, 0, sizeof(udpdest));
@@ -436,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* tack our header on the front of the iovec */
 	size = sizeof(struct aunhdr);
-	/*
-	 * XXX: that is b0rken.  We can't mix userland and kernel pointers
-	 * in iovec, since on a lot of platforms copy_from_user() will
-	 * *not* work with the kernel and userland ones at the same time,
-	 * regardless of what we do with set_fs().  And we are talking about
-	 * econet-over-ethernet here, so "it's only ARM anyway" doesn't
-	 * apply.  Any suggestions on fixing that code?		-- AV
-	 */
 	iov[0].iov_base = (void *)&ah;
 	iov[0].iov_len = size;
-	for (i = 0; i < msg->msg_iovlen; i++) {
-		void __user *base = msg->msg_iov[i].iov_base;
-		size_t iov_len = msg->msg_iov[i].iov_len;
-		/* Check it now since we switch to KERNEL_DS later. */
-		if (!access_ok(VERIFY_READ, base, iov_len)) {
-			mutex_unlock(&econet_mutex);
-			return -EFAULT;
-		}
-		iov[i+1].iov_base = base;
-		iov[i+1].iov_len = iov_len;
-		size += iov_len;
+
+	userbuf = vmalloc(len);
+	if (userbuf == NULL) {
+		err = -ENOMEM;
+		goto error;
 	}
 
+	iov[1].iov_base = userbuf;
+	iov[1].iov_len = len;
+	err = memcpy_fromiovec(userbuf, msg->msg_iov, len);
+	if (err)
+		goto error_free_buf;
+
 	/* Get a skbuff (no data, just holds our cb information) */
 	if ((skb = sock_alloc_send_skb(sk, 0,
 				       msg->msg_flags & MSG_DONTWAIT,
-				       &err)) == NULL) {
-		mutex_unlock(&econet_mutex);
-		return err;
-	}
+				       &err)) == NULL)
+		goto error_free_buf;
 
 	eb = (struct ec_cb *)&skb->cb;
 
@@ -481,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	udpmsg.msg_name = (void *)&udpdest;
 	udpmsg.msg_namelen = sizeof(udpdest);
 	udpmsg.msg_iov = &iov[0];
-	udpmsg.msg_iovlen = msg->msg_iovlen + 1;
+	udpmsg.msg_iovlen = 2;
 	udpmsg.msg_control = NULL;
 	udpmsg.msg_controllen = 0;
 	udpmsg.msg_flags=0;
@@ -489,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	oldfs = get_fs(); set_fs(KERNEL_DS);	/* More privs :-) */
 	err = sock_sendmsg(udpsock, &udpmsg, size);
 	set_fs(oldfs);
+
+error_free_buf:
+	vfree(userbuf);
 #else
 	err = -EPROTOTYPE;
 #endif
+	error:
 	mutex_unlock(&econet_mutex);
 
 	return err;
-- 
cgit v1.2.3


From f910043ce00791c06afc3789278447c4e88670ea Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 24 Nov 2010 11:09:03 -0800
Subject: OMAP: UART: don't resume UARTs that are not enabled.

Add additional check to omap_uart_resume_idle() so that only
enabled (specifically, idle-enabled) UARTs are allowed to resume.
This matches the existing check in prepare idle.

Without this patch, the system will hang if a board is
configured to register only some uarts instead of all of
them and PM is enabled.

Cc: Govindraj R. <govindraj.raja@ti.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
[tony@atomide.com: updated description]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/serial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index becf0e38ef7e..bc934dbe3bf0 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -406,7 +406,7 @@ void omap_uart_resume_idle(int num)
 	struct omap_uart_state *uart;
 
 	list_for_each_entry(uart, &uart_list, node) {
-		if (num == uart->num) {
+		if (num == uart->num && uart->can_sleep) {
 			omap_uart_enable_clocks(uart);
 
 			/* Check for IO pad wakeup */
-- 
cgit v1.2.3


From 0d8e2d0dad98a693bad88aea6876ac8b94ad95c6 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Wed, 24 Nov 2010 16:49:05 -0700
Subject: OMAP2+: PM/serial: hold console semaphore while OMAP UARTs are
 disabled

The console semaphore must be held while the OMAP UART devices are
disabled, lest a console write cause an ARM abort (and a kernel crash)
when the underlying console device is inaccessible.  These crashes
only occur when the console is on one of the OMAP internal serial
ports.

While this problem has been latent in the PM idle loop for some time,
the crash was not triggerable with an unmodified kernel until commit
6f251e9db1093c187addc309b5f2f7fe3efd2995 ("OMAP: UART: omap_device
conversions, remove implicit 8520 assumptions").  After this patch, a
console write often occurs after the console UART has been disabled in
the idle loop, crashing the system.  Several users have encountered
this bug:

    http://www.mail-archive.com/linux-omap@vger.kernel.org/msg38396.html

    http://www.mail-archive.com/linux-omap@vger.kernel.org/msg36602.html

The same commit also introduced new code that disabled the UARTs
during init, in omap_serial_init_port().  The kernel will also crash
in this code when earlyconsole and extra debugging is enabled:

    http://www.mail-archive.com/linux-omap@vger.kernel.org/msg36411.html

The minimal fix for the -rc series is to hold the console semaphore
while the OMAP UARTs are disabled.  This is a somewhat overbroad fix,
since the console may not be located on an OMAP UART, as is the case
with the GPMC UART on Zoom3.  While it is technically possible to
determine which devices the console or earlyconsole is actually
running on, it is not a trivial problem to solve, and the code to do
so is not really appropriate for the -rc series.

The right long-term fix is to ensure that no code outside of the OMAP
serial driver can disable an OMAP UART.  As I understand it, code to
implement this is under development by TI.

This patch is a collaboration between Paul Walmsley <paul@pwsan.com>
and Tony Lindgren <tony@atomide.com>.  Thanks to Ming Lei
<tom.leiming@gmail.com> and Pramod <pramod.gurav@ti.com> for their
feedback on earlier versions of this patch.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Pramod <pramod.gurav@ti.com>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Jean Pihet <jean.pihet@newoldbits.com>
Cc: Govindraj.R <govindraj.raja@ti.com>
---
 arch/arm/mach-omap2/pm24xx.c |  7 +++++++
 arch/arm/mach-omap2/pm34xx.c | 10 ++++++++++
 arch/arm/mach-omap2/serial.c |  5 +++++
 3 files changed, 22 insertions(+)

diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c
index a40457d81927..c85923e56b85 100644
--- a/arch/arm/mach-omap2/pm24xx.c
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -30,6 +30,7 @@
 #include <linux/irq.h>
 #include <linux/time.h>
 #include <linux/gpio.h>
+#include <linux/console.h>
 
 #include <asm/mach/time.h>
 #include <asm/mach/irq.h>
@@ -118,6 +119,10 @@ static void omap2_enter_full_retention(void)
 	if (omap_irq_pending())
 		goto no_sleep;
 
+	/* Block console output in case it is on one of the OMAP UARTs */
+	if (try_acquire_console_sem())
+		goto no_sleep;
+
 	omap_uart_prepare_idle(0);
 	omap_uart_prepare_idle(1);
 	omap_uart_prepare_idle(2);
@@ -131,6 +136,8 @@ static void omap2_enter_full_retention(void)
 	omap_uart_resume_idle(1);
 	omap_uart_resume_idle(0);
 
+	release_console_sem();
+
 no_sleep:
 	if (omap2_pm_debug) {
 		unsigned long long tmp;
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 75c0cd13ad8e..0ec8a04b7473 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -28,6 +28,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/console.h>
 
 #include <plat/sram.h>
 #include <plat/clockdomain.h>
@@ -385,6 +386,12 @@ void omap_sram_idle(void)
 		omap3_enable_io_chain();
 	}
 
+	/* Block console output in case it is on one of the OMAP UARTs */
+	if (per_next_state < PWRDM_POWER_ON ||
+	    core_next_state < PWRDM_POWER_ON)
+		if (try_acquire_console_sem())
+			goto console_still_active;
+
 	/* PER */
 	if (per_next_state < PWRDM_POWER_ON) {
 		omap_uart_prepare_idle(2);
@@ -463,6 +470,9 @@ void omap_sram_idle(void)
 		omap_uart_resume_idle(3);
 	}
 
+	release_console_sem();
+
+console_still_active:
 	/* Disable IO-PAD and IO-CHAIN wakeup */
 	if (omap3_has_io_wakeup() &&
 	    (per_next_state < PWRDM_POWER_ON ||
diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index bc934dbe3bf0..d17960a1be25 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/serial_8250.h>
 #include <linux/pm_runtime.h>
+#include <linux/console.h>
 
 #ifdef CONFIG_SERIAL_OMAP
 #include <plat/omap-serial.h>
@@ -807,6 +808,8 @@ void __init omap_serial_init_port(int port)
 
 	oh->dev_attr = uart;
 
+	acquire_console_sem(); /* in case the earlycon is on the UART */
+
 	/*
 	 * Because of early UART probing, UART did not get idled
 	 * on init.  Now that omap_device is ready, ensure full idle
@@ -831,6 +834,8 @@ void __init omap_serial_init_port(int port)
 	omap_uart_block_sleep(uart);
 	uart->timeout = DEFAULT_TIMEOUT;
 
+	release_console_sem();
+
 	if ((cpu_is_omap34xx() && uart->padconf) ||
 	    (uart->wk_en && uart->wk_mask)) {
 		device_init_wakeup(&od->pdev.dev, true);
-- 
cgit v1.2.3


From 6fc50eafc49262e376455a480f9d793817fe74e2 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Tue, 16 Nov 2010 18:11:59 +0900
Subject: ARM: S3C2410: Adapt h1940-bluetooth to gpiolib changes

Replace in s3c_gpio_cfgpull with s3c_gpio_setpull.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s3c2410/h1940-bluetooth.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c
index 8aa2f1902a94..6b86a722a7db 100644
--- a/arch/arm/mach-s3c2410/h1940-bluetooth.c
+++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c
@@ -77,13 +77,13 @@ static int __devinit h1940bt_probe(struct platform_device *pdev)
 
 	/* Configures BT serial port GPIOs */
 	s3c_gpio_cfgpin(S3C2410_GPH(0), S3C2410_GPH0_nCTS0);
-	s3c_gpio_cfgpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE);
+	s3c_gpio_setpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE);
 	s3c_gpio_cfgpin(S3C2410_GPH(1), S3C2410_GPIO_OUTPUT);
-	s3c_gpio_cfgpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE);
+	s3c_gpio_setpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE);
 	s3c_gpio_cfgpin(S3C2410_GPH(2), S3C2410_GPH2_TXD0);
-	s3c_gpio_cfgpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE);
+	s3c_gpio_setpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE);
 	s3c_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0);
-	s3c_gpio_cfgpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE);
+	s3c_gpio_setpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE);
 
 
 	rfk = rfkill_alloc(DRV_NAME, &pdev->dev, RFKILL_TYPE_BLUETOOTH,
-- 
cgit v1.2.3


From cce58ab380727169ef72b76481441f851e5850b0 Mon Sep 17 00:00:00 2001
From: Kukjin Kim <kgene.kim@samsung.com>
Date: Mon, 15 Nov 2010 11:11:22 +0900
Subject: ARM: S3C24XX: Fix wrong s3c_gpio_cfgpull

This patch fixes wrong s3c_gpio_cfgpull with s3c_gpio_setpull.

Cc: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c | 6 +++---
 arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c   | 6 +++---
 arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c    | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
index 9793544a6ace..704175b0573f 100644
--- a/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
+++ b/arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
@@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus0_gpe11_12_13(struct s3c2410_spi_info *spi,
 	} else {
 		s3c_gpio_cfgpin(S3C2410_GPE(13), S3C2410_GPIO_INPUT);
 		s3c_gpio_cfgpin(S3C2410_GPE(11), S3C2410_GPIO_INPUT);
-		s3c_gpio_cfgpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE);
 	}
 }
diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c b/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c
index db9e9e477ec1..72457afd6255 100644
--- a/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c
+++ b/arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c
@@ -31,8 +31,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpd8_9_10(struct s3c2410_spi_info *spi,
 	} else {
 		s3c_gpio_cfgpin(S3C2410_GPD(8), S3C2410_GPIO_INPUT);
 		s3c_gpio_cfgpin(S3C2410_GPD(9), S3C2410_GPIO_INPUT);
-		s3c_gpio_cfgpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE);
 	}
 }
diff --git a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
index 8ea663a438bb..c3972b645d13 100644
--- a/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
+++ b/arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
@@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpg5_6_7(struct s3c2410_spi_info *spi,
 	} else {
 		s3c_gpio_cfgpin(S3C2410_GPG(7), S3C2410_GPIO_INPUT);
 		s3c_gpio_cfgpin(S3C2410_GPG(5), S3C2410_GPIO_INPUT);
-		s3c_gpio_cfgpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE);
 	}
 }
-- 
cgit v1.2.3


From 8b8c87dee47ae7e41af95d03ca56b3a4633466a6 Mon Sep 17 00:00:00 2001
From: Darius Augulis <augulis.darius@gmail.com>
Date: Tue, 16 Nov 2010 18:08:50 +0900
Subject: ARM: S3C64XX: fix uart clock setup for mini6410/real6410

Don't rewrite clock config in UCON preconfigured by
bootloader. No need to set 10th bit in UCON because
[11:10] 2'b00 means source clock is PCLK too.
If set, console does not work if bootloader
has preconfigured [11:10] with 2'b00.
If not set, console works with any bootloader
config value (2'bxx).
More information about clock setup in UCON is available
in "S3C6410X RISC Microprocessor User's Manual,
Revision 1.20" p. 31-13 (Chapter 31.6.2
UART CONTROL REGISTER).

Signed-off-by: Darius Augulis <augulis.darius@gmail.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s3c64xx/mach-mini6410.c | 2 +-
 arch/arm/mach-s3c64xx/mach-real6410.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c
index 249c62956471..89f35e02e883 100644
--- a/arch/arm/mach-s3c64xx/mach-mini6410.c
+++ b/arch/arm/mach-s3c64xx/mach-mini6410.c
@@ -45,7 +45,7 @@
 
 #include <video/platform_lcd.h>
 
-#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK)
+#define UCON S3C2410_UCON_DEFAULT
 #define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB)
 #define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE)
 
diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c
index f9ef9b5c5f5a..4957ab0a0d4a 100644
--- a/arch/arm/mach-s3c64xx/mach-real6410.c
+++ b/arch/arm/mach-s3c64xx/mach-real6410.c
@@ -46,7 +46,7 @@
 
 #include <video/platform_lcd.h>
 
-#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK)
+#define UCON S3C2410_UCON_DEFAULT
 #define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB)
 #define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE)
 
-- 
cgit v1.2.3


From 18ad782c7fdbbb6b9db9e8d912fee1d783fe79e0 Mon Sep 17 00:00:00 2001
From: Abhilash Kesavan <a.kesavan@samsung.com>
Date: Thu, 21 Oct 2010 06:45:48 +0530
Subject: ARM: S3C24XX: Fix Demux error in UART3 irqs on S3C2443 and S3C2416

IRQ_S3C2443_UART3 is being used as the base when it should actually
be IRQ_S3C2443_RX3 on S3C2443 and S3C2416 for the UART3.

Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
Signed-off-by: Sangbeom Kim <sbkim73@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s3c2416/irq.c | 2 +-
 arch/arm/mach-s3c2443/irq.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c
index 084d121f368c..784485777896 100644
--- a/arch/arm/mach-s3c2416/irq.c
+++ b/arch/arm/mach-s3c2416/irq.c
@@ -168,7 +168,7 @@ static struct irq_chip s3c2416_irq_dma = {
 
 static void s3c2416_irq_demux_uart3(unsigned int irq, struct irq_desc *desc)
 {
-	s3c2416_irq_demux(IRQ_S3C2443_UART3, 3);
+	s3c2416_irq_demux(IRQ_S3C2443_RX3, 3);
 }
 
 #define INTMSK_UART3	(1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0))
diff --git a/arch/arm/mach-s3c2443/irq.c b/arch/arm/mach-s3c2443/irq.c
index 0e0d693f3974..df2b8db49818 100644
--- a/arch/arm/mach-s3c2443/irq.c
+++ b/arch/arm/mach-s3c2443/irq.c
@@ -166,7 +166,7 @@ static struct irq_chip s3c2443_irq_dma = {
 
 static void s3c2443_irq_demux_uart3(unsigned int irq, struct irq_desc *desc)
 {
-	s3c2443_irq_demux(IRQ_S3C2443_UART3, 3);
+	s3c2443_irq_demux(IRQ_S3C2443_RX3, 3);
 }
 
 #define INTMSK_UART3	(1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0))
-- 
cgit v1.2.3


From 35bbcfe6dd283ab36b46ce7c0b79986acad7b20b Mon Sep 17 00:00:00 2001
From: Abhilash Kesavan <a.kesavan@samsung.com>
Date: Thu, 21 Oct 2010 06:45:48 +0530
Subject: ARM: S3C24XX: Fix UART3 submask on S3C2416 and S3C2443

The UART3 submask should be 0x7 (SUBSRCPND[26:24]).

Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
Signed-off-by: Sangbeom Kim <sbkim73@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s3c2416/irq.c | 3 +--
 arch/arm/mach-s3c2443/irq.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c
index 784485777896..00174daf1526 100644
--- a/arch/arm/mach-s3c2416/irq.c
+++ b/arch/arm/mach-s3c2416/irq.c
@@ -172,8 +172,7 @@ static void s3c2416_irq_demux_uart3(unsigned int irq, struct irq_desc *desc)
 }
 
 #define INTMSK_UART3	(1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0))
-#define SUBMSK_UART3	(0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
-
+#define SUBMSK_UART3	(0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
 
 static void s3c2416_irq_uart3_mask(unsigned int irqno)
 {
diff --git a/arch/arm/mach-s3c2443/irq.c b/arch/arm/mach-s3c2443/irq.c
index df2b8db49818..893424767ce1 100644
--- a/arch/arm/mach-s3c2443/irq.c
+++ b/arch/arm/mach-s3c2443/irq.c
@@ -170,8 +170,7 @@ static void s3c2443_irq_demux_uart3(unsigned int irq, struct irq_desc *desc)
 }
 
 #define INTMSK_UART3	(1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0))
-#define SUBMSK_UART3	(0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
-
+#define SUBMSK_UART3	(0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
 
 static void s3c2443_irq_uart3_mask(unsigned int irqno)
 {
-- 
cgit v1.2.3


From 9adf262a9f81489635f7868c9819cb864192811b Mon Sep 17 00:00:00 2001
From: Abhilash Kesavan <a.kesavan@samsung.com>
Date: Fri, 8 Oct 2010 09:07:19 +0530
Subject: ARM: S5PV210: Fix sysdev related warning messages

This patch fixes following warning messages when CONFIG_PM selected.

In file included from arch/arm/mach-s5pv210/mach-smdkv210.c:34:
arch/arm/plat-samsung/include/plat/pm.h:104: warning: 'struct sys_device'
declared inside parameter list
arch/arm/plat-samsung/include/plat/pm.h:104: warning: its scope is only this
definition or declaration, which is probably not what you want
arch/arm/plat-samsung/include/plat/pm.h:105: warning: 'struct sys_device'
declared inside parameter list

In file included from arch/arm/mach-s5pv210/mach-smdkc110.c:31:
arch/arm/plat-samsung/include/plat/pm.h:104: warning: 'struct sys_device'
declared inside parameter list
arch/arm/plat-samsung/include/plat/pm.h:104: warning: its scope is only this
definition or declaration, which is probably not what you want
arch/arm/plat-samsung/include/plat/pm.h:105: warning: 'struct sys_device'
declared inside parameter list

Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
Signed-off-by: Sangbeom Kim <sbkim73@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s5pv210/mach-smdkc110.c | 1 +
 arch/arm/mach-s5pv210/mach-smdkv210.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm/mach-s5pv210/mach-smdkc110.c b/arch/arm/mach-s5pv210/mach-smdkc110.c
index 0ad7924fe62e..5dd1681c069e 100644
--- a/arch/arm/mach-s5pv210/mach-smdkc110.c
+++ b/arch/arm/mach-s5pv210/mach-smdkc110.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/serial_core.h>
 #include <linux/i2c.h>
+#include <linux/sysdev.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-s5pv210/mach-smdkv210.c b/arch/arm/mach-s5pv210/mach-smdkv210.c
index bcd7a5d53401..1fbc45b2a432 100644
--- a/arch/arm/mach-s5pv210/mach-smdkv210.c
+++ b/arch/arm/mach-s5pv210/mach-smdkv210.c
@@ -13,6 +13,7 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/serial_core.h>
+#include <linux/sysdev.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-- 
cgit v1.2.3


From 7167594a3da7dcc33203b85d62e519594baee390 Mon Sep 17 00:00:00 2001
From: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Date: Thu, 25 Nov 2010 00:08:01 -0200
Subject: ALSA: hda - Fix ALC660-VD/ALC861-VD capture/playback mixers

The mixer nids passed to alc_auto_create_input_ctls are wrong: 0x15 is
a pin, and 0x09 is the ADC on both ALC660-VD/ALC861-VD. Thus with
current code, input playback volume/switches and input source mixer
controls are not created, and recording doesn't work. Select correct
mixers, 0x0b (input playback mixer) and 0x22 (capture source mixer).

Reference: https://qa.mandriva.com/show_bug.cgi?id=61159

Signed-off-by: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b0e6b8b47fa9..81a2a49b862c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -16943,7 +16943,7 @@ static struct alc_config_preset alc861vd_presets[] = {
 static int alc861vd_auto_create_input_ctls(struct hda_codec *codec,
 						const struct auto_pin_cfg *cfg)
 {
-	return alc_auto_create_input_ctls(codec, cfg, 0x15, 0x09, 0);
+	return alc_auto_create_input_ctls(codec, cfg, 0x0b, 0x22, 0);
 }
 
 
-- 
cgit v1.2.3


From e8129c642155616d9e2160a75f103e127c8c3708 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 25 Nov 2010 09:52:45 +0100
Subject: [S390] nmi: fix clock comparator revalidation

On each machine check all registers are revalidated. The save area for
the clock comparator however only contains the upper most seven bytes
of the former contents, if valid.
Therefore the machine check handler uses a store clock instruction to
get the current time and writes that to the clock comparator register
which in turn will generate an immediate timer interrupt.
However within the lowcore the expected time of the next timer
interrupt is stored. If the interrupt happens before that time the
handler won't be called. In turn the clock comparator won't be
reprogrammed and therefore the interrupt condition stays pending which
causes an interrupt loop until the expected time is reached.

On NOHZ machines this can result in unresponsive machines since the
time of the next expected interrupted can be a couple of days in the
future.

To fix this just revalidate the clock comparator register with the
expected value.
In addition the special handling for udelay must be changed as well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/nmi.c | 10 ++++------
 arch/s390/lib/delay.c  | 14 +++++++++-----
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index ac151399ef34..1995c1712fc8 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -95,7 +95,6 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck);
 static int notrace s390_revalidate_registers(struct mci *mci)
 {
 	int kill_task;
-	u64 tmpclock;
 	u64 zero;
 	void *fpt_save_area, *fpt_creg_save_area;
 
@@ -214,11 +213,10 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			: "0", "cc");
 #endif
 	/* Revalidate clock comparator register */
-	asm volatile(
-		"	stck	0(%1)\n"
-		"	sckc	0(%1)"
-		: "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
-
+	if (S390_lowcore.clock_comparator == -1)
+		set_clock_comparator(S390_lowcore.mcck_clock);
+	else
+		set_clock_comparator(S390_lowcore.clock_comparator);
 	/* Check if old PSW is valid */
 	if (!mci->wp)
 		/*
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 752b362bf651..7c37ec359ec2 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -29,17 +29,21 @@ static void __udelay_disabled(unsigned long long usecs)
 {
 	unsigned long mask, cr0, cr0_saved;
 	u64 clock_saved;
+	u64 end;
 
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
+	end = get_clock() + (usecs << 12);
 	clock_saved = local_tick_disable();
-	set_clock_comparator(get_clock() + (usecs << 12));
 	__ctl_store(cr0_saved, 0, 0);
 	cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
 	__ctl_load(cr0 , 0, 0);
-	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
 	lockdep_off();
-	trace_hardirqs_on();
-	__load_psw_mask(mask);
-	local_irq_disable();
+	do {
+		set_clock_comparator(end);
+		trace_hardirqs_on();
+		__load_psw_mask(mask);
+		local_irq_disable();
+	} while (get_clock() < end);
 	lockdep_on();
 	__ctl_load(cr0_saved, 0, 0);
 	local_tick_enable(clock_saved);
-- 
cgit v1.2.3


From 4814a2b3c603438ed8c330c74b49aa662b1ede43 Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Thu, 25 Nov 2010 09:52:46 +0100
Subject: [S390] qdio: free indicator after reset is finished

The qdio device indicator is freed before the device is notified that
the indicator is reset. This sequence contains a race when the freed
indicator is used by a new device while the reset of the indicator is
still pending. Do the reset operation before freeing the indicator to
avoid that potential race.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/qdio_thinint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 752dbee06af5..5d9c66627b6e 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -292,8 +292,8 @@ void qdio_shutdown_thinint(struct qdio_irq *irq_ptr)
 		return;
 
 	/* reset adapter interrupt indicators */
-	put_indicator(irq_ptr->dsci);
 	set_subchannel_ind(irq_ptr, 1);
+	put_indicator(irq_ptr->dsci);
 }
 
 void __exit tiqdio_unregister_thinints(void)
-- 
cgit v1.2.3


From 11cd1a8b8cad1acfc140d9acce93762a9c140b20 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 14 Nov 2010 17:31:52 +0200
Subject: vhost/net: fix rcu check usage

Incorrect rcu check was used as rcu isn't done
under mutex here. Force check to 1 for now,
to stop it from complaining.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 4b4da5b86ff9..f442668a1e52 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -129,8 +129,9 @@ static void handle_tx(struct vhost_net *net)
 	size_t hdr_size;
 	struct socket *sock;
 
-	sock = rcu_dereference_check(vq->private_data,
-				     lockdep_is_held(&vq->mutex));
+	/* TODO: check that we are running from vhost_worker?
+	 * Not sure it's worth it, it's straight-forward enough. */
+	sock = rcu_dereference_check(vq->private_data, 1);
 	if (!sock)
 		return;
 
-- 
cgit v1.2.3


From 846172dfe33c7ee07638e04f94dd90e21dfdc5ba Mon Sep 17 00:00:00 2001
From: Dmitry Artamonow <mad_soft@inbox.ru>
Date: Thu, 25 Nov 2010 00:46:15 +0300
Subject: ASoC: fix SND_PXA2XX_LIB Kconfig warning

Fix following warning observed when SND_PXA2XX_SOC is set and SND_ARM isn't:

warning: (SND_PXA2XX_AC97 && SOUND && !M68K && SND && SND_ARM && ARCH_PXA ||
SND_PXA2XX_SOC && SOUND && !M68K && SND && SND_SOC && ARCH_PXA) selects
SND_PXA2XX_LIB which has unmet direct dependencies (SOUND && !M68K && SND &&
SND_ARM)

Signed-off-by: Dmitry Artamonow <mad_soft@inbox.ru>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/pxa/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig
index 37f191bbfdd9..580f48571303 100644
--- a/sound/soc/pxa/Kconfig
+++ b/sound/soc/pxa/Kconfig
@@ -1,6 +1,7 @@
 config SND_PXA2XX_SOC
 	tristate "SoC Audio for the Intel PXA2xx chip"
 	depends on ARCH_PXA
+	select SND_ARM
 	select SND_PXA2XX_LIB
 	help
 	  Say Y or M if you want to add support for codecs attached to
-- 
cgit v1.2.3


From 3b6bc354cb22b1069f88acdc7673d3476fbadfca Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 17:23:55 +0800
Subject: ASoC: Call snd_soc_unregister_dais instead of snd_soc_unregister_dai
 in sh4_soc_dai_remove

We call snd_soc_register_dais() in sh4_soc_dai_probe(),
thus we should call snd_soc_unregister_dais() in sh4_soc_dai_remove().

Otherwise, we got "too many arguments to function 'snd_soc_unregister_dai'"
error message.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/sh/ssi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c
index 40bbdf1591dc..05192d97b377 100644
--- a/sound/soc/sh/ssi.c
+++ b/sound/soc/sh/ssi.c
@@ -387,7 +387,7 @@ static int __devinit sh4_soc_dai_probe(struct platform_device *pdev)
 
 static int __devexit sh4_soc_dai_remove(struct platform_device *pdev)
 {
-	snd_soc_unregister_dai(&pdev->dev, ARRAY_SIZE(sh4_ssi_dai));
+	snd_soc_unregister_dais(&pdev->dev, ARRAY_SIZE(sh4_ssi_dai));
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4e1f86509732ccc39938974db0612d14afbca953 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 15:07:25 +0800
Subject: ASoC: efika-audio-fabric: fix resource leak in efika_fabric_init
 error path

Add missing platform_device_put() if platform_device_add() failed.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/fsl/efika-audio-fabric.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c
index 53251e6b5bd5..108b5d8bd0e9 100644
--- a/sound/soc/fsl/efika-audio-fabric.c
+++ b/sound/soc/fsl/efika-audio-fabric.c
@@ -76,6 +76,7 @@ static __init int efika_fabric_init(void)
 	rc = platform_device_add(pdev);
 	if (rc) {
 		pr_err("efika_fabric_init: platform_device_add() failed\n");
+		platform_device_put(pdev);
 		return -ENODEV;
 	}
 	return 0;
-- 
cgit v1.2.3


From 917dac0ff1754776b86967b0ec1750022d9c4265 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 15:08:31 +0800
Subject: ASoC: pcm030-audio-fabric: fix resource leak in pcm030_fabric_init
 error path

Add missing platform_device_put() if platform_device_add() failed.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/fsl/pcm030-audio-fabric.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
index 25f27ec1dd6e..ba4d85e317ed 100644
--- a/sound/soc/fsl/pcm030-audio-fabric.c
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -76,6 +76,7 @@ static __init int pcm030_fabric_init(void)
 	rc = platform_device_add(pdev);
 	if (rc) {
 		pr_err("pcm030_fabric_init: platform_device_add() failed\n");
+		platform_device_put(pdev);
 		return -ENODEV;
 	}
 	return 0;
-- 
cgit v1.2.3


From b193deead8637291138a8c1c49753ee686fa5b17 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 10:44:59 +0800
Subject: ASoC: snd-soc-afeb9260: remove unneeded platform_device_del in error
 path

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/atmel/snd-soc-afeb9260.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/soc/atmel/snd-soc-afeb9260.c b/sound/soc/atmel/snd-soc-afeb9260.c
index e3d283561c19..86e0f8586dc3 100644
--- a/sound/soc/atmel/snd-soc-afeb9260.c
+++ b/sound/soc/atmel/snd-soc-afeb9260.c
@@ -167,7 +167,6 @@ static int __init afeb9260_soc_init(void)
 
 	return 0;
 err1:
-	platform_device_del(afeb9260_snd_device);
 	platform_device_put(afeb9260_snd_device);
 	return err;
 }
-- 
cgit v1.2.3


From c7a734e58ed237ecac2608a70eb31ba64e21c768 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 15:11:03 +0800
Subject: ASoC: sam9g20_wm8731: fix resource leak in at91sam9g20ek_init error
 path

Fix the error path to properly free allocated resources.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/atmel/sam9g20_wm8731.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c
index 032e17dd8fdb..e521ada80542 100644
--- a/sound/soc/atmel/sam9g20_wm8731.c
+++ b/sound/soc/atmel/sam9g20_wm8731.c
@@ -240,6 +240,7 @@ static int __init at91sam9g20ek_init(void)
 	if (!at91sam9g20ek_snd_device) {
 		printk(KERN_ERR "ASoC: Platform device allocation failed\n");
 		ret = -ENOMEM;
+		goto err_mclk;
 	}
 
 	platform_set_drvdata(at91sam9g20ek_snd_device,
@@ -248,11 +249,13 @@ static int __init at91sam9g20ek_init(void)
 	ret = platform_device_add(at91sam9g20ek_snd_device);
 	if (ret) {
 		printk(KERN_ERR "ASoC: Platform device allocation failed\n");
-		platform_device_put(at91sam9g20ek_snd_device);
+		goto err_device_add;
 	}
 
 	return ret;
 
+err_device_add:
+	platform_device_put(at91sam9g20ek_snd_device);
 err_mclk:
 	clk_put(mclk);
 	mclk = NULL;
-- 
cgit v1.2.3


From 14abca3dfc51c0a4f798183f131d63bfd6552bd4 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 15:12:30 +0800
Subject: ASoC: simone: fix resource leak in simone_init error path

Fix the error path to properly free allocated resources.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Mika Westerberg <mika.westerberg@iki.fi>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/ep93xx/simone.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/sound/soc/ep93xx/simone.c b/sound/soc/ep93xx/simone.c
index 4b0d19913728..286817946c56 100644
--- a/sound/soc/ep93xx/simone.c
+++ b/sound/soc/ep93xx/simone.c
@@ -54,24 +54,26 @@ static int __init simone_init(void)
 
 	ret = platform_device_add(simone_snd_ac97_device);
 	if (ret)
-		goto fail;
+		goto fail1;
 
 	simone_snd_device = platform_device_alloc("soc-audio", -1);
 	if (!simone_snd_device) {
 		ret = -ENOMEM;
-		goto fail;
+		goto fail2;
 	}
 
 	platform_set_drvdata(simone_snd_device, &snd_soc_simone);
 	ret = platform_device_add(simone_snd_device);
-	if (ret) {
-		platform_device_put(simone_snd_device);
-		goto fail;
-	}
+	if (ret)
+		goto fail3;
 
-	return ret;
+	return 0;
 
-fail:
+fail3:
+	platform_device_put(simone_snd_device);
+fail2:
+	platform_device_del(simone_snd_ac97_device);
+fail1:
 	platform_device_put(simone_snd_ac97_device);
 	return ret;
 }
-- 
cgit v1.2.3


From ac8f924af555573e29b126ac5cef4fdd122ae517 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 15:13:09 +0800
Subject: ASoC: imx-ssi: fix resource leak

Fix imx_ssi_probe() error path and imx_ssi_remove() to properly free
allocated resources.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/imx/imx-ssi.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/sound/soc/imx/imx-ssi.c b/sound/soc/imx/imx-ssi.c
index d2d98c75ee8a..390b6ffc2658 100644
--- a/sound/soc/imx/imx-ssi.c
+++ b/sound/soc/imx/imx-ssi.c
@@ -679,8 +679,11 @@ static int imx_ssi_probe(struct platform_device *pdev)
 	}
 
 	ssi->soc_platform_pdev_fiq = platform_device_alloc("imx-fiq-pcm-audio", pdev->id);
-	if (!ssi->soc_platform_pdev_fiq)
+	if (!ssi->soc_platform_pdev_fiq) {
+		ret = -ENOMEM;
 		goto failed_pdev_fiq_alloc;
+	}
+
 	platform_set_drvdata(ssi->soc_platform_pdev_fiq, ssi);
 	ret = platform_device_add(ssi->soc_platform_pdev_fiq);
 	if (ret) {
@@ -689,8 +692,11 @@ static int imx_ssi_probe(struct platform_device *pdev)
 	}
 
 	ssi->soc_platform_pdev = platform_device_alloc("imx-pcm-audio", pdev->id);
-	if (!ssi->soc_platform_pdev)
+	if (!ssi->soc_platform_pdev) {
+		ret = -ENOMEM;
 		goto failed_pdev_alloc;
+	}
+
 	platform_set_drvdata(ssi->soc_platform_pdev, ssi);
 	ret = platform_device_add(ssi->soc_platform_pdev);
 	if (ret) {
@@ -703,6 +709,7 @@ static int imx_ssi_probe(struct platform_device *pdev)
 failed_pdev_add:
 	platform_device_put(ssi->soc_platform_pdev);
 failed_pdev_alloc:
+	platform_device_del(ssi->soc_platform_pdev_fiq);
 failed_pdev_fiq_add:
 	platform_device_put(ssi->soc_platform_pdev_fiq);
 failed_pdev_fiq_alloc:
@@ -726,8 +733,8 @@ static int __devexit imx_ssi_remove(struct platform_device *pdev)
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct imx_ssi *ssi = platform_get_drvdata(pdev);
 
-	platform_device_del(ssi->soc_platform_pdev);
-	platform_device_put(ssi->soc_platform_pdev);
+	platform_device_unregister(ssi->soc_platform_pdev);
+	platform_device_unregister(ssi->soc_platform_pdev_fiq);
 
 	snd_soc_unregister_dai(&pdev->dev);
 
-- 
cgit v1.2.3


From 09de9533348632fbbf32ce618f669882aa718817 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 15:14:03 +0800
Subject: ASoC: phycore-ac97: fix resource leak

Fix imx_phycore_init() error path and imx_phycore_exit() to properly free
allocated resources.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/imx/phycore-ac97.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/sound/soc/imx/phycore-ac97.c b/sound/soc/imx/phycore-ac97.c
index 39f23734781a..9eabc28667e6 100644
--- a/sound/soc/imx/phycore-ac97.c
+++ b/sound/soc/imx/phycore-ac97.c
@@ -43,6 +43,7 @@ static struct snd_soc_card imx_phycore = {
 	.num_links	= ARRAY_SIZE(imx_phycore_dai_ac97),
 };
 
+static struct platform_device *imx_phycore_snd_ac97_device;
 static struct platform_device *imx_phycore_snd_device;
 
 static int __init imx_phycore_init(void)
@@ -53,29 +54,42 @@ static int __init imx_phycore_init(void)
 		/* return happy. We might run on a totally different machine */
 		return 0;
 
-	imx_phycore_snd_device = platform_device_alloc("soc-audio", -1);
-	if (!imx_phycore_snd_device)
+	imx_phycore_snd_ac97_device = platform_device_alloc("soc-audio", -1);
+	if (!imx_phycore_snd_ac97_device)
 		return -ENOMEM;
 
-	platform_set_drvdata(imx_phycore_snd_device, &imx_phycore);
-	ret = platform_device_add(imx_phycore_snd_device);
+	platform_set_drvdata(imx_phycore_snd_ac97_device, &imx_phycore);
+	ret = platform_device_add(imx_phycore_snd_ac97_device);
+	if (ret)
+		goto fail1;
 
 	imx_phycore_snd_device = platform_device_alloc("wm9712-codec", -1);
-	if (!imx_phycore_snd_device)
-		return -ENOMEM;
+	if (!imx_phycore_snd_device) {
+		ret = -ENOMEM;
+		goto fail2;
+	}
 	ret = platform_device_add(imx_phycore_snd_device);
 
 	if (ret) {
 		printk(KERN_ERR "ASoC: Platform device allocation failed\n");
-		platform_device_put(imx_phycore_snd_device);
+		goto fail3;
 	}
 
+	return 0;
+
+fail3:
+	platform_device_put(imx_phycore_snd_device);
+fail2:
+	platform_device_del(imx_phycore_snd_ac97_device);
+fail1:
+	platform_device_put(imx_phycore_snd_ac97_device);
 	return ret;
 }
 
 static void __exit imx_phycore_exit(void)
 {
 	platform_device_unregister(imx_phycore_snd_device);
+	platform_device_unregister(imx_phycore_snd_ac97_device);
 }
 
 late_initcall(imx_phycore_init);
-- 
cgit v1.2.3


From 8b6b30ab665d3bbb23180c39f6215e6f64516ed0 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Nov 2010 11:33:14 +0800
Subject: ASoC: davinci-vcif - fix a memory leak

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/davinci/davinci-vcif.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/davinci/davinci-vcif.c b/sound/soc/davinci/davinci-vcif.c
index fb4cc1edf339..9d2afccc3a2d 100644
--- a/sound/soc/davinci/davinci-vcif.c
+++ b/sound/soc/davinci/davinci-vcif.c
@@ -247,7 +247,10 @@ fail:
 
 static int davinci_vcif_remove(struct platform_device *pdev)
 {
+	struct davinci_vcif_dev *davinci_vcif_dev = dev_get_drvdata(&pdev->dev);
+
 	snd_soc_unregister_dai(&pdev->dev);
+	kfree(davinci_vcif_dev);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 5989f11ba11c72f98d57580b991418d9ff6a207d Mon Sep 17 00:00:00 2001
From: Raoul Bhatia <r.bhatia@ipax.at>
Date: Thu, 25 Nov 2010 17:32:47 +0100
Subject: EDAC: Fix typos in Documentation/edac.txt

Fix trivial typos in edac.txt

Signed-off-by: Raoul Bhatia <r.bhatia@ipax.at>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 Documentation/edac.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index 0b875e8da969..9ee774de57cd 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -196,7 +196,7 @@ csrow3.
 The representation of the above is reflected in the directory tree
 in EDAC's sysfs interface. Starting in directory
 /sys/devices/system/edac/mc each memory controller will be represented
-by its own 'mcX' directory, where 'X" is the index of the MC.
+by its own 'mcX' directory, where 'X' is the index of the MC.
 
 
 	..../edac/mc/
@@ -207,7 +207,7 @@ by its own 'mcX' directory, where 'X" is the index of the MC.
 		   ....
 
 Under each 'mcX' directory each 'csrowX' is again represented by a
-'csrowX', where 'X" is the csrow index:
+'csrowX', where 'X' is the csrow index:
 
 
 	.../mc/mc0/
@@ -232,7 +232,7 @@ EDAC control and attribute files.
 
 
 In 'mcX' directories are EDAC control and attribute files for
-this 'X" instance of the memory controllers:
+this 'X' instance of the memory controllers:
 
 
 Counter reset control file:
@@ -343,7 +343,7 @@ Sdram memory scrubbing rate:
 'csrowX' DIRECTORIES
 
 In the 'csrowX' directories are EDAC control and attribute files for
-this 'X" instance of csrow:
+this 'X' instance of csrow:
 
 
 Total Uncorrectable Errors count attribute file:
-- 
cgit v1.2.3


From fe99b55994f08d321cc5f621c3634b1de4961d01 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Wed, 24 Nov 2010 22:40:59 +0800
Subject: ASoC: tlv320aic3x - fix variable may be used uninitialized warning

If aic3x_read failed , val is used uninitialized.
Fix it by initializing val to 0.

This patch fixes below compile warning:
sound/soc/codecs/tlv320aic3x.c: In function 'aic3x_get_gpio':
sound/soc/codecs/tlv320aic3x.c:1183: warning: 'val' may be used uninitialized in this function
sound/soc/codecs/tlv320aic3x.c: In function 'aic3x_headset_detected':
sound/soc/codecs/tlv320aic3x.c:1211: warning: 'val' may be used uninitialized in this function
sound/soc/codecs/tlv320aic3x.c: In function 'aic3x_button_pressed':
sound/soc/codecs/tlv320aic3x.c:1219: warning: 'val' may be used uninitialized in this function

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/codecs/tlv320aic3x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index fc687790188b..77b8f9ae29be 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -1176,7 +1176,7 @@ EXPORT_SYMBOL_GPL(aic3x_set_gpio);
 int aic3x_get_gpio(struct snd_soc_codec *codec, int gpio)
 {
 	u8 reg = gpio ? AIC3X_GPIO2_REG : AIC3X_GPIO1_REG;
-	u8 val, bit = gpio ? 2: 1;
+	u8 val = 0, bit = gpio ? 2 : 1;
 
 	aic3x_read(codec, reg, &val);
 	return (val >> bit) & 1;
@@ -1204,7 +1204,7 @@ EXPORT_SYMBOL_GPL(aic3x_set_headset_detection);
 
 int aic3x_headset_detected(struct snd_soc_codec *codec)
 {
-	u8 val;
+	u8 val = 0;
 	aic3x_read(codec, AIC3X_HEADSET_DETECT_CTRL_B, &val);
 	return (val >> 4) & 1;
 }
@@ -1212,7 +1212,7 @@ EXPORT_SYMBOL_GPL(aic3x_headset_detected);
 
 int aic3x_button_pressed(struct snd_soc_codec *codec)
 {
-	u8 val;
+	u8 val = 0;
 	aic3x_read(codec, AIC3X_HEADSET_DETECT_CTRL_B, &val);
 	return (val >> 5) & 1;
 }
-- 
cgit v1.2.3


From 25436180ee8bed6740f29d92c2030c759885c147 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Wed, 24 Nov 2010 22:24:01 +0800
Subject: ASoC: Fix resource reclaim for osk5912

In current implementation, there are resources leak in the error path.
This patch properly reclaims the allocated resources in the error path.

Also adds a missing clk_put in osk_soc_exit.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Jarkko Nikula <jhnikula@gmail.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/omap/osk5912.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c
index f0e662556428..65ae00e976ef 100644
--- a/sound/soc/omap/osk5912.c
+++ b/sound/soc/omap/osk5912.c
@@ -177,7 +177,8 @@ static int __init osk_soc_init(void)
 	tlv320aic23_mclk = clk_get(dev, "mclk");
 	if (IS_ERR(tlv320aic23_mclk)) {
 		printk(KERN_ERR "Could not get mclk clock\n");
-		return -ENODEV;
+		err = PTR_ERR(tlv320aic23_mclk);
+		goto err2;
 	}
 
 	/*
@@ -188,7 +189,7 @@ static int __init osk_soc_init(void)
 		if (clk_set_rate(tlv320aic23_mclk, CODEC_CLOCK)) {
 			printk(KERN_ERR "Cannot set MCLK for AIC23 CODEC\n");
 			err = -ECANCELED;
-			goto err1;
+			goto err3;
 		}
 	}
 
@@ -196,9 +197,12 @@ static int __init osk_soc_init(void)
 	       (uint) clk_get_rate(tlv320aic23_mclk), CODEC_CLOCK);
 
 	return 0;
-err1:
+
+err3:
 	clk_put(tlv320aic23_mclk);
+err2:
 	platform_device_del(osk_snd_device);
+err1:
 	platform_device_put(osk_snd_device);
 
 	return err;
@@ -207,6 +211,7 @@ err1:
 
 static void __exit osk_soc_exit(void)
 {
+	clk_put(tlv320aic23_mclk);
 	platform_device_unregister(osk_snd_device);
 }
 
-- 
cgit v1.2.3


From 4917b284db8607e414c334317b7d15239854d39c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 22 Nov 2010 12:48:34 -0800
Subject: dmar, x86: Use function stubs when CONFIG_INTR_REMAP is disabled

The stubs for CONFIG_INTR_REMAP disabled need to be functions
instead of values to eliminate build warnings.

 arch/x86/kernel/apic/apic.c: In function 'lapic_suspend':
 arch/x86/kernel/apic/apic.c:2060:3: warning: statement with no effect
 arch/x86/kernel/apic/apic.c: In function 'lapic_resume':
 arch/x86/kernel/apic/apic.c:2137:3: warning: statement with no effect

Reported-and-Tested-by: Fabio Comolli <fabio.comolli@gmail.com>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
LKML-Reference: <20101122124834.74429004.randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/dmar.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index a7d9dc21391d..7b776d71d36d 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -175,10 +175,21 @@ static inline int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 	return 0;
 }
 
-#define enable_intr_remapping(mode)	(-1)
-#define disable_intr_remapping()	(0)
-#define reenable_intr_remapping(mode)	(0)
 #define intr_remapping_enabled		(0)
+
+static inline int enable_intr_remapping(int eim)
+{
+	return -1;
+}
+
+static inline void disable_intr_remapping(void)
+{
+}
+
+static inline int reenable_intr_remapping(int eim)
+{
+	return 0;
+}
 #endif
 
 /* Can't use the common MSI interrupt functions
-- 
cgit v1.2.3


From dddd3379a619a4cb8247bfd3c94ca9ae3797aa2e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 24 Nov 2010 10:05:55 +0100
Subject: perf: Fix inherit vs. context rotation bug

It was found that sometimes children of tasks with inherited events had
one extra event. Eventually it turned out to be due to the list rotation
no being exclusive with the list iteration in the inheritance code.

Cure this by temporarily disabling the rotation while we inherit the events.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  1 +
 kernel/perf_event.c        | 22 ++++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 40150f345982..142e3d6042c7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -850,6 +850,7 @@ struct perf_event_context {
 	int				nr_active;
 	int				is_active;
 	int				nr_stat;
+	int				rotate_disable;
 	atomic_t			refcount;
 	struct task_struct		*task;
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 671f6c8c8a32..f365dd8ef8b0 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1622,8 +1622,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
 {
 	raw_spin_lock(&ctx->lock);
 
-	/* Rotate the first entry last of non-pinned groups */
-	list_rotate_left(&ctx->flexible_groups);
+	/*
+	 * Rotate the first entry last of non-pinned groups. Rotation might be
+	 * disabled by the inheritance code.
+	 */
+	if (!ctx->rotate_disable)
+		list_rotate_left(&ctx->flexible_groups);
 
 	raw_spin_unlock(&ctx->lock);
 }
@@ -6162,6 +6166,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
 	struct perf_event *event;
 	struct task_struct *parent = current;
 	int inherited_all = 1;
+	unsigned long flags;
 	int ret = 0;
 
 	child->perf_event_ctxp[ctxn] = NULL;
@@ -6202,6 +6207,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
 			break;
 	}
 
+	/*
+	 * We can't hold ctx->lock when iterating the ->flexible_group list due
+	 * to allocations, but we need to prevent rotation because
+	 * rotate_ctx() will change the list from interrupt context.
+	 */
+	raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+	parent_ctx->rotate_disable = 1;
+	raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+
 	list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
 		ret = inherit_task_group(event, parent, parent_ctx,
 					 child, ctxn, &inherited_all);
@@ -6209,6 +6223,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
 			break;
 	}
 
+	raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+	parent_ctx->rotate_disable = 0;
+	raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+
 	child_ctx = child->perf_event_ctxp[ctxn];
 
 	if (child_ctx && inherited_all) {
-- 
cgit v1.2.3


From 33c6d6a7ad0ffab9b1b15f8e4107a2af072a05a0 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Mon, 22 Nov 2010 16:55:23 -0500
Subject: x86, perf, nmi: Disable perf if counters are not accessible

In a kvm virt guests, the perf counters are not emulated.  Instead they
return zero on a rdmsrl. The perf nmi handler uses the fact that crossing
a zero means the counter overflowed (for those counters that do not have
specific interrupt bits). Therefore on kvm guests, perf will swallow all
NMIs thinking the counters overflowed.

This causes problems for subsystems like kgdb which needs NMIs to do its
magic. This problem was discovered by running kgdb tests.

The solution is to write garbage into a perf counter during the
initialization and hopefully reading back the same number.  On kvm
guests, the value will be read back as zero and we disable perf as
a result.

Reported-by: Jason Wessel <jason.wessel@windriver.com>
Patch-inspired-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <1290462923-30734-1-git-send-email-dzickus@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed6310183efb..6d75b9145b13 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -381,6 +381,20 @@ static void release_pmc_hardware(void) {}
 
 #endif
 
+static bool check_hw_exists(void)
+{
+	u64 val, val_new = 0;
+	int ret = 0;
+
+	val = 0xabcdUL;
+	ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+	ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+	if (ret || val != val_new)
+		return false;
+
+	return true;
+}
+
 static void reserve_ds_buffers(void);
 static void release_ds_buffers(void);
 
@@ -1372,6 +1386,12 @@ void __init init_hw_perf_events(void)
 
 	pmu_check_apic();
 
+	/* sanity check that the hardware exists or is emulated */
+	if (!check_hw_exists()) {
+		pr_cont("Broken PMU hardware detected, software events only.\n");
+		return;
+	}
+
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
 	if (x86_pmu.quirks)
-- 
cgit v1.2.3


From cc2067a51424dd25c10c1b1230b4222d8baec94d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 16 Nov 2010 21:49:01 +0100
Subject: perf, x86: Fixup Kconfig deps

This leads to a Kconfig dep inversion, x86 selects PERF_EVENT (due to
a hw_breakpoint dep) but doesn't unconditionally provide
HAVE_PERF_EVENT.

(This can cause build failures on M386/M486 kernel .config's.)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20101117222055.982965150@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e8327686d3c5..e330da21b84f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,7 +21,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
-	select HAVE_PERF_EVENTS if (!M386 && !M486)
+	select HAVE_PERF_EVENTS
 	select HAVE_IRQ_WORK
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
-- 
cgit v1.2.3


From ee6dcfa40a50fe12a3ae0fb4d2653c66c3ed6556 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 26 Nov 2010 13:49:04 +0100
Subject: perf: Fix the software context switch counter

Stephane noticed that because the perf_sw_event() call is inside the
perf_event_task_sched_out() call it won't get called unless we
have a per-task counter.

Reported-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 29 +++++++++++++++--------------
 kernel/perf_event.c        |  2 --
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 142e3d6042c7..de2c41758e29 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -909,20 +909,6 @@ extern int perf_num_counters(void);
 extern const char *perf_pmu_name(void);
 extern void __perf_event_task_sched_in(struct task_struct *task);
 extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
-
-extern atomic_t perf_task_events;
-
-static inline void perf_event_task_sched_in(struct task_struct *task)
-{
-	COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
-}
-
-static inline
-void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
-{
-	COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
-}
-
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
@@ -1031,6 +1017,21 @@ have_event:
 	__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
+extern atomic_t perf_task_events;
+
+static inline void perf_event_task_sched_in(struct task_struct *task)
+{
+	COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
+}
+
+static inline
+void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+{
+	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
+
+	COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
+}
+
 extern void perf_event_mmap(struct vm_area_struct *vma);
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f365dd8ef8b0..eac7e3364335 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1287,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
 {
 	int ctxn;
 
-	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
-
 	for_each_task_context_nr(ctxn)
 		perf_event_context_sched_out(task, ctxn, next);
 }
-- 
cgit v1.2.3


From 5a8cfb4e8ae317d283f84122ed20faa069c5e0c4 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 26 Nov 2010 17:11:18 +0100
Subject: ALSA: hda - Use ALC_INIT_DEFAULT for really default initialization

When SKU assid gives no valid bits for 0x38, the driver didn't take
any action, so far.  This resulted in the missing initialization for
external amps, etc, thus the silent output in the end.

Especially users hit this problem on ALC888 newly since 2.6.35,
where the driver doesn't force to use ALC_INIT_DEFAULT any more.

This patch sets the default initialization scheme to use
ALC_INIT_DEFAULT when no valid bits are set for SKU assid.

Reference:
	https://bugzilla.redhat.com/show_bug.cgi?id=657388

Reported-and-tested-by: Kyle McMartin <kyle@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 81a2a49b862c..886d7c72936e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1614,6 +1614,7 @@ do_sku:
 		spec->init_amp = ALC_INIT_GPIO3;
 		break;
 	case 5:
+	default:
 		spec->init_amp = ALC_INIT_DEFAULT;
 		break;
 	}
-- 
cgit v1.2.3


From 55a61d1d06a3dc443d0db8aaa613365dcb83b98a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 22 Nov 2010 18:50:32 +0000
Subject: Btrfs: fix typo in fallocate to make it honor actual size

There is a typo in __btrfs_prealloc_file_range() where we set the i_size to
actual_len/cur_offset, and then just set it to cur_offset again, and do the same
with btrfs_ordered_update_i_size().  This fixes it back to keeping i_size in a
local variable and then updating i_size properly.  Tested this with

xfs_io -F -f -c "falloc 0 1" -c "pwrite 0 1" foo

stat'ing foo gives us a size of 1 instead of 4096 like it was.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 37cc1776a5d7..0058fb3c2561 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7002,6 +7002,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_key ins;
 	u64 cur_offset = start;
+	u64 i_size;
 	int ret = 0;
 	bool own_trans = true;
 
@@ -7043,11 +7044,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 		    (actual_len > inode->i_size) &&
 		    (cur_offset > inode->i_size)) {
 			if (cur_offset > actual_len)
-				i_size_write(inode, actual_len);
+				i_size = actual_len;
 			else
-				i_size_write(inode, cur_offset);
-			i_size_write(inode, cur_offset);
-			btrfs_ordered_update_i_size(inode, cur_offset, NULL);
+				i_size = cur_offset;
+			i_size_write(inode, i_size);
+			btrfs_ordered_update_i_size(inode, i_size, NULL);
 		}
 
 		ret = btrfs_update_inode(trans, root, inode);
-- 
cgit v1.2.3


From 0ed42a63f3edb144b091d9528401fce95c3c4d8d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 22 Nov 2010 18:55:39 +0000
Subject: Btrfs: make sure new inode size is ok in fallocate

We have been failing xfstest 228 forever, because we don't check to make sure
the new inode size is acceptable as far as RLIMIT is concerned.  Just check to
make sure it's ok to create a inode with this new size and error out if not.
With this patch we now pass 228.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0058fb3c2561..0eeacd93e8e5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7102,6 +7102,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 	btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
 
 	mutex_lock(&inode->i_mutex);
+	ret = inode_newsize_ok(inode, alloc_end);
+	if (ret)
+		goto out;
+
 	if (alloc_start > inode->i_size) {
 		ret = btrfs_cont_expand(inode, alloc_start);
 		if (ret)
-- 
cgit v1.2.3


From bc1cbf1f86aa2501efa9ca637c736fce6bcc4b1d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 23 Nov 2010 19:50:59 +0000
Subject: Btrfs: update inode ctime when using links

Currently we fail xfstest 236 because we're not updating the inode ctime on
link.  This is a simple fix, and makes it so we pass 236 now.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0eeacd93e8e5..6df921f218fb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4785,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 		return -EPERM;
 
 	btrfs_inc_nlink(inode);
+	inode->i_ctime = CURRENT_TIME;
 
 	err = btrfs_set_inode_index(dir, &index);
 	if (err)
-- 
cgit v1.2.3


From 619c8c763928841b1112e1d417f88bc1d44daecb Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 22 Nov 2010 02:21:38 +0000
Subject: Btrfs - fix race between btrfs_get_sb() and umount

When mounting a btrfs file system btrfs_test_super() may attempt to
use sb->s_fs_info, the btrfs root, of a super block that is going away
and that has had the btrfs root set to NULL in its ->put_super(). But
if the super block is going away it cannot be an existing super block
so we can return false in this case.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/super.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 66e4612a7916..141fb317d3bc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -566,6 +566,12 @@ static int btrfs_test_super(struct super_block *s, void *data)
 	struct btrfs_fs_devices *test_fs_devices = data;
 	struct btrfs_root *root = btrfs_sb(s);
 
+	/*
+	 * If this super block is going away, return false as it
+	 * can't match as an existing super block.
+	 */
+	if (!atomic_read(&s->s_active))
+		return 0;
 	return root->fs_info->fs_devices == test_fs_devices;
 }
 
-- 
cgit v1.2.3


From 975f84fee2e8a77ee5f41bfe7c5682bf29366b10 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 23 Nov 2010 19:36:57 +0000
Subject: Btrfs: fix fiemap

There are two big problems currently with FIEMAP

1) We return extents for holes.  This isn't supposed to happen, we just don't
return extents for holes and then userspace interprets the lack of an extent as
a hole.

2) We sometimes don't set FIEMAP_EXTENT_LAST properly.  This is because we wait
to see a EXTENT_FLAG_VACANCY flag on the em, but this won't happen if say we ask
fiemap to map up to the last extent in a file, and there is nothing but holes up
to the i_size.  To fix this we need to lookup the last extent in this file and
save the logical offset, so if we happen to try and map that extent we can be
sure to set FIEMAP_EXTENT_LAST.

With this patch we now pass xfstest 225, which we never have before.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent_io.c | 63 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 143d3f541d64..5e7a94d7da89 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2901,21 +2901,53 @@ out:
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len, get_extent_t *get_extent)
 {
-	int ret;
+	int ret = 0;
 	u64 off = start;
 	u64 max = start + len;
 	u32 flags = 0;
+	u32 found_type;
+	u64 last;
 	u64 disko = 0;
+	struct btrfs_key found_key;
 	struct extent_map *em = NULL;
 	struct extent_state *cached_state = NULL;
+	struct btrfs_path *path;
+	struct btrfs_file_extent_item *item;
 	int end = 0;
 	u64 em_start = 0, em_len = 0;
 	unsigned long emflags;
-	ret = 0;
+	int hole = 0;
 
 	if (len == 0)
 		return -EINVAL;
 
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	path->leave_spinning = 1;
+
+	ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
+				       path, inode->i_ino, -1, 0);
+	if (ret < 0) {
+		btrfs_free_path(path);
+		return ret;
+	}
+	WARN_ON(!ret);
+	path->slots[0]--;
+	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			      struct btrfs_file_extent_item);
+	btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+	found_type = btrfs_key_type(&found_key);
+
+	/* No extents, just return */
+	if (found_key.objectid != inode->i_ino ||
+	    found_type != BTRFS_EXTENT_DATA_KEY) {
+		btrfs_free_path(path);
+		return 0;
+	}
+	last = found_key.offset;
+	btrfs_free_path(path);
+
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
 			 &cached_state, GFP_NOFS);
 	em = get_extent(inode, NULL, 0, off, max - off, 0);
@@ -2925,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		ret = PTR_ERR(em);
 		goto out;
 	}
+
 	while (!end) {
+		hole = 0;
 		off = em->start + em->len;
 		if (off >= max)
 			end = 1;
 
+		if (em->block_start == EXTENT_MAP_HOLE) {
+			hole = 1;
+			goto next;
+		}
+
 		em_start = em->start;
 		em_len = em->len;
 
@@ -2939,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		if (em->block_start == EXTENT_MAP_LAST_BYTE) {
 			end = 1;
 			flags |= FIEMAP_EXTENT_LAST;
-		} else if (em->block_start == EXTENT_MAP_HOLE) {
-			flags |= FIEMAP_EXTENT_UNWRITTEN;
 		} else if (em->block_start == EXTENT_MAP_INLINE) {
 			flags |= (FIEMAP_EXTENT_DATA_INLINE |
 				  FIEMAP_EXTENT_NOT_ALIGNED);
@@ -2953,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
 			flags |= FIEMAP_EXTENT_ENCODED;
 
+next:
 		emflags = em->flags;
 		free_extent_map(em);
 		em = NULL;
-
 		if (!end) {
 			em = get_extent(inode, NULL, 0, off, max - off, 0);
 			if (!em)
@@ -2967,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			}
 			emflags = em->flags;
 		}
+
 		if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
 			flags |= FIEMAP_EXTENT_LAST;
 			end = 1;
 		}
 
-		ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-					em_len, flags);
-		if (ret)
-			goto out_free;
+		if (em_start == last) {
+			flags |= FIEMAP_EXTENT_LAST;
+			end = 1;
+		}
+
+		if (!hole) {
+			ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+						em_len, flags);
+			if (ret)
+				goto out_free;
+		}
 	}
 out_free:
 	free_extent_map(em);
-- 
cgit v1.2.3


From 450ba0ea06b6ed3612d27f2b7127a9de4160f285 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 19 Nov 2010 14:59:15 -0500
Subject: Btrfs: setup blank root and fs_info for mount time

There is a problem with how we use sget, it searches through the list of supers
attached to the fs_type looking for a super with the same fs_devices as what
we're trying to mount.  This depends on sb->s_fs_info being filled, but we don't
fill that in until we get to btrfs_fill_super, so we could hit supers on the
fs_type super list that have a null s_fs_info.  In order to fix that we need to
go ahead and setup a blank root with a blank fs_info to hold fs_devices, that
way our test will work out right and then we can set s_fs_info in
btrfs_set_super, and then open_ctree will simply use our pre-allocated root and
fs_info when setting everything up.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c |  6 ++----
 fs/btrfs/super.c   | 34 +++++++++++++++++++++++++++++++---
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a67b98d58c2a..57c9d8eeb7dc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1561,10 +1561,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 						 GFP_NOFS);
 	struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
 						 GFP_NOFS);
-	struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
-					       GFP_NOFS);
-	struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
-						GFP_NOFS);
+	struct btrfs_root *tree_root = btrfs_sb(sb);
+	struct btrfs_fs_info *fs_info = tree_root->fs_info;
 	struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
 						GFP_NOFS);
 	struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 141fb317d3bc..47bf67cbe6bf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -563,7 +563,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 static int btrfs_test_super(struct super_block *s, void *data)
 {
-	struct btrfs_fs_devices *test_fs_devices = data;
+	struct btrfs_root *test_root = data;
 	struct btrfs_root *root = btrfs_sb(s);
 
 	/*
@@ -572,9 +572,17 @@ static int btrfs_test_super(struct super_block *s, void *data)
 	 */
 	if (!atomic_read(&s->s_active))
 		return 0;
-	return root->fs_info->fs_devices == test_fs_devices;
+	return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
 }
 
+static int btrfs_set_super(struct super_block *s, void *data)
+{
+	s->s_fs_info = data;
+
+	return set_anon_super(s, data);
+}
+
+
 /*
  * Find a superblock for the given device / mount point.
  *
@@ -588,6 +596,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
 	struct super_block *s;
 	struct dentry *root;
 	struct btrfs_fs_devices *fs_devices = NULL;
+	struct btrfs_root *tree_root = NULL;
+	struct btrfs_fs_info *fs_info = NULL;
 	fmode_t mode = FMODE_READ;
 	char *subvol_name = NULL;
 	u64 subvol_objectid = 0;
@@ -615,8 +625,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
 		goto error_close_devices;
 	}
 
+	/*
+	 * Setup a dummy root and fs_info for test/set super.  This is because
+	 * we don't actually fill this stuff out until open_ctree, but we need
+	 * it for searching for existing supers, so this lets us do that and
+	 * then open_ctree will properly initialize everything later.
+	 */
+	fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
+	tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+	if (!fs_info || !tree_root) {
+		error = -ENOMEM;
+		goto error_close_devices;
+	}
+	fs_info->tree_root = tree_root;
+	fs_info->fs_devices = fs_devices;
+	tree_root->fs_info = fs_info;
+
 	bdev = fs_devices->latest_bdev;
-	s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
+	s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
 	if (IS_ERR(s))
 		goto error_s;
 
@@ -685,6 +711,8 @@ error_s:
 	error = PTR_ERR(s);
 error_close_devices:
 	btrfs_close_devices(fs_devices);
+	kfree(fs_info);
+	kfree(tree_root);
 error_free_subvol_name:
 	kfree(subvol_name);
 	return error;
-- 
cgit v1.2.3


From 4cb6a614ba0e58cae8abdadbf73bcb4d37a3f599 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:23:50 +0000
Subject: Net: ceph: Makefile: Remove unnessary code

Remove the if and else conditional because the code is in mainline and there
is no need in it being there.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ceph/Makefile | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index aab1cabb8035..5f19415ec9c0 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,9 +1,6 @@
 #
 # Makefile for CEPH filesystem.
 #
-
-ifneq ($(KERNELRELEASE),)
-
 obj-$(CONFIG_CEPH_LIB) += libceph.o
 
 libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
@@ -16,22 +13,3 @@ libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
 	ceph_fs.o ceph_strings.o ceph_hash.o \
 	pagevec.o
 
-else
-#Otherwise we were called directly from the command
-# line; invoke the kernel build system.
-
-KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-PWD := $(shell pwd)
-
-default: all
-
-all:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
-
-modules_install:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
-
-clean:
-	$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
-
-endif
-- 
cgit v1.2.3


From 8475ef9fd16cadbfc692f78e608d1941a340beb2 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 22 Nov 2010 03:26:12 +0000
Subject: netns: Don't leak others' openreq-s in proc

The /proc/net/tcp leaks openreq sockets from other namespaces.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 69ccbc1dde9c..e13da6de1fc7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2043,7 +2043,9 @@ get_req:
 	}
 get_sk:
 	sk_nulls_for_each_from(sk, node) {
-		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (sk->sk_family == st->family) {
 			cur = sk;
 			goto out;
 		}
-- 
cgit v1.2.3


From 0147fc058d11bd4009b126d09974d2c8f48fef15 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 22 Nov 2010 12:54:21 +0000
Subject: tcp: restrict net.ipv4.tcp_adv_win_scale (#20312)

tcp_win_from_space() does the following:

      if (sysctl_tcp_adv_win_scale <= 0)
              return space >> (-sysctl_tcp_adv_win_scale);
      else
              return space - (space >> sysctl_tcp_adv_win_scale);

"space" is int.

As per C99 6.5.7 (3) shifting int for 32 or more bits is
undefined behaviour.

Indeed, if sysctl_tcp_adv_win_scale is exactly 32,
space >> 32 equals space and function returns 0.

Which means we busyloop in tcp_fixup_rcvbuf().

Restrict net.ipv4.tcp_adv_win_scale to [-31, 31].

Fix https://bugzilla.kernel.org/show_bug.cgi?id=20312

Steps to reproduce:

      echo 32 >/proc/sys/net/ipv4/tcp_adv_win_scale
      wget www.kernel.org
      [softlockup]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 1 +
 net/ipv4/sysctl_net_ipv4.c             | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fe95105992c5..3c5e465296e1 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -144,6 +144,7 @@ tcp_adv_win_scale - INTEGER
 	Count buffering overhead as bytes/2^tcp_adv_win_scale
 	(if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale),
 	if it is <= 0.
+	Possible values are [-31, 31], inclusive.
 	Default: 2
 
 tcp_allowed_congestion_control - STRING
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e91911d7aae2..1b4ec21497a4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,6 +26,8 @@ static int zero;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
+static int tcp_adv_win_scale_min = -31;
+static int tcp_adv_win_scale_max = 31;
 
 /* Update system visible IP port range */
 static void set_local_port_range(int range[2])
@@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = {
 		.data		= &sysctl_tcp_adv_win_scale,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &tcp_adv_win_scale_min,
+		.extra2		= &tcp_adv_win_scale_max,
 	},
 	{
 		.procname	= "tcp_tw_reuse",
-- 
cgit v1.2.3


From 0ac78870220b6e0ac74dd9292bcfa7b18718babd Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 23 Nov 2010 02:36:56 +0000
Subject: dccp: fix error in updating the GAR

This fixes a bug in updating the Greatest Acknowledgment number Received (GAR):
the current implementation does not track the greatest received value -
lower values in the range AWL..AWH (RFC 4340, 7.5.1) erase higher ones.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 265985370fa1..e424a09e83f6 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -239,7 +239,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
 		dccp_update_gsr(sk, seqno);
 
 		if (dh->dccph_type != DCCP_PKT_SYNC &&
-		    (ackno != DCCP_PKT_WITHOUT_ACK_SEQ))
+		    ackno != DCCP_PKT_WITHOUT_ACK_SEQ &&
+		    after48(ackno, dp->dccps_gar))
 			dp->dccps_gar = ackno;
 	} else {
 		unsigned long now = jiffies;
-- 
cgit v1.2.3


From 462ca99c2ff6caae94dde5c05b56b54f6c01602a Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Tue, 23 Nov 2010 06:40:25 +0000
Subject: au1000_eth: fix invalid address accessing the MAC enable register

"aup->enable" holds already the address pointing to the MAC enable
register. The bug was introduced by commit d0e7cb:

"au1000-eth: remove volatiles, switch to I/O accessors".

CC: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Wolfgang Grandegger <wg@denx.de>
Acked-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/au1000_eth.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 43489f89c142..53eff9ba6e95 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -155,10 +155,10 @@ static void au1000_enable_mac(struct net_device *dev, int force_reset)
 	spin_lock_irqsave(&aup->lock, flags);
 
 	if (force_reset || (!aup->mac_enabled)) {
-		writel(MAC_EN_CLOCK_ENABLE, &aup->enable);
+		writel(MAC_EN_CLOCK_ENABLE, aup->enable);
 		au_sync_delay(2);
 		writel((MAC_EN_RESET0 | MAC_EN_RESET1 | MAC_EN_RESET2
-				| MAC_EN_CLOCK_ENABLE), &aup->enable);
+				| MAC_EN_CLOCK_ENABLE), aup->enable);
 		au_sync_delay(2);
 
 		aup->mac_enabled = 1;
@@ -503,9 +503,9 @@ static void au1000_reset_mac_unlocked(struct net_device *dev)
 
 	au1000_hard_stop(dev);
 
-	writel(MAC_EN_CLOCK_ENABLE, &aup->enable);
+	writel(MAC_EN_CLOCK_ENABLE, aup->enable);
 	au_sync_delay(2);
-	writel(0, &aup->enable);
+	writel(0, aup->enable);
 	au_sync_delay(2);
 
 	aup->tx_full = 0;
@@ -1119,7 +1119,7 @@ static int __devinit au1000_probe(struct platform_device *pdev)
 	/* set a random MAC now in case platform_data doesn't provide one */
 	random_ether_addr(dev->dev_addr);
 
-	writel(0, &aup->enable);
+	writel(0, aup->enable);
 	aup->mac_enabled = 0;
 
 	pd = pdev->dev.platform_data;
-- 
cgit v1.2.3


From 3c6f27bf33052ea6ba9d82369fb460726fb779c0 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Tue, 23 Nov 2010 11:02:13 +0000
Subject: DECnet: don't leak uninitialized stack byte

A single uninitialized padding byte is leaked to userspace.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
CC: stable <stable@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/af_decnet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a76b78de679f..6f97268ed85f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
 			if (r_len > sizeof(struct linkinfo_dn))
 				r_len = sizeof(struct linkinfo_dn);
 
+			memset(&link, 0, sizeof(link));
+
 			switch(sock->state) {
 				case SS_CONNECTING:
 					link.idn_linkstate = LL_CONNECTING;
-- 
cgit v1.2.3


From bcc70bb3aeae7c3d035881d41055685f08a2b745 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 23 Nov 2010 11:43:44 +0000
Subject: net, ppp: Report correct error code if unit allocation failed

Allocating unit from ird might return several error codes
not only -EAGAIN, so it should not be changed and returned
precisely. Same time unit release procedure should be invoked
only if device is unregistering.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Paul Mackerras <paulus@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_generic.c | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 09cf56d0416a..39659976a1ac 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -2584,16 +2584,16 @@ ppp_create_interface(struct net *net, int unit, int *retp)
 	 */
 	dev_net_set(dev, net);
 
-	ret = -EEXIST;
 	mutex_lock(&pn->all_ppp_mutex);
 
 	if (unit < 0) {
 		unit = unit_get(&pn->units_idr, ppp);
 		if (unit < 0) {
-			*retp = unit;
+			ret = unit;
 			goto out2;
 		}
 	} else {
+		ret = -EEXIST;
 		if (unit_find(&pn->units_idr, unit))
 			goto out2; /* unit already exists */
 		/*
@@ -2668,10 +2668,10 @@ static void ppp_shutdown_interface(struct ppp *ppp)
 		ppp->closing = 1;
 		ppp_unlock(ppp);
 		unregister_netdev(ppp->dev);
+		unit_put(&pn->units_idr, ppp->file.index);
 	} else
 		ppp_unlock(ppp);
 
-	unit_put(&pn->units_idr, ppp->file.index);
 	ppp->file.dead = 1;
 	ppp->owner = NULL;
 	wake_up_interruptible(&ppp->file.rwait);
@@ -2859,8 +2859,7 @@ static void __exit ppp_cleanup(void)
  * by holding all_ppp_mutex
  */
 
-/* associate pointer with specified number */
-static int unit_set(struct idr *p, void *ptr, int n)
+static int __unit_alloc(struct idr *p, void *ptr, int n)
 {
 	int unit, err;
 
@@ -2871,10 +2870,24 @@ again:
 	}
 
 	err = idr_get_new_above(p, ptr, n, &unit);
-	if (err == -EAGAIN)
-		goto again;
+	if (err < 0) {
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+
+	return unit;
+}
+
+/* associate pointer with specified number */
+static int unit_set(struct idr *p, void *ptr, int n)
+{
+	int unit;
 
-	if (unit != n) {
+	unit = __unit_alloc(p, ptr, n);
+	if (unit < 0)
+		return unit;
+	else if (unit != n) {
 		idr_remove(p, unit);
 		return -EINVAL;
 	}
@@ -2885,19 +2898,7 @@ again:
 /* get new free unit number and associate pointer with it */
 static int unit_get(struct idr *p, void *ptr)
 {
-	int unit, err;
-
-again:
-	if (!idr_pre_get(p, GFP_KERNEL)) {
-		printk(KERN_ERR "PPP: No free memory for idr\n");
-		return -ENOMEM;
-	}
-
-	err = idr_get_new_above(p, ptr, 0, &unit);
-	if (err == -EAGAIN)
-		goto again;
-
-	return unit;
+	return __unit_alloc(p, ptr, 0);
 }
 
 /* put unit number back to a pool */
-- 
cgit v1.2.3


From 42eb59d3a80ff83b4cacb92dcc44b22da7d4969b Mon Sep 17 00:00:00 2001
From: Casey Leedom <leedom@chelsio.com>
Date: Wed, 24 Nov 2010 12:23:57 +0000
Subject: cxgb4vf: fix setting unicast/multicast addresses ...

We were truncating the number of unicast and multicast MAC addresses
supported.  Additionally, we were incorrectly computing the MAC Address
hash (a "1 << N" where we needed a "1ULL << N").

Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cxgb4vf/cxgb4vf_main.c | 73 +++++++++++++++++------------
 drivers/net/cxgb4vf/t4vf_hw.c      | 94 ++++++++++++++++++++++++--------------
 2 files changed, 104 insertions(+), 63 deletions(-)

diff --git a/drivers/net/cxgb4vf/cxgb4vf_main.c b/drivers/net/cxgb4vf/cxgb4vf_main.c
index c3449bbc585a..d887a76cd39d 100644
--- a/drivers/net/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/cxgb4vf/cxgb4vf_main.c
@@ -816,40 +816,48 @@ static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 }
 
 /*
- * Collect up to maxaddrs worth of a netdevice's unicast addresses into an
- * array of addrss pointers and return the number collected.
+ * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
+ * at a specified offset within the list, into an array of addrss pointers and
+ * return the number collected.
  */
-static inline int collect_netdev_uc_list_addrs(const struct net_device *dev,
-					       const u8 **addr,
-					       unsigned int maxaddrs)
+static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
+							const u8 **addr,
+							unsigned int offset,
+							unsigned int maxaddrs)
 {
+	unsigned int index = 0;
 	unsigned int naddr = 0;
 	const struct netdev_hw_addr *ha;
 
-	for_each_dev_addr(dev, ha) {
-		addr[naddr++] = ha->addr;
-		if (naddr >= maxaddrs)
-			break;
-	}
+	for_each_dev_addr(dev, ha)
+		if (index++ >= offset) {
+			addr[naddr++] = ha->addr;
+			if (naddr >= maxaddrs)
+				break;
+		}
 	return naddr;
 }
 
 /*
- * Collect up to maxaddrs worth of a netdevice's multicast addresses into an
- * array of addrss pointers and return the number collected.
+ * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
+ * at a specified offset within the list, into an array of addrss pointers and
+ * return the number collected.
  */
-static inline int collect_netdev_mc_list_addrs(const struct net_device *dev,
-					       const u8 **addr,
-					       unsigned int maxaddrs)
+static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
+							const u8 **addr,
+							unsigned int offset,
+							unsigned int maxaddrs)
 {
+	unsigned int index = 0;
 	unsigned int naddr = 0;
 	const struct netdev_hw_addr *ha;
 
-	netdev_for_each_mc_addr(ha, dev) {
-		addr[naddr++] = ha->addr;
-		if (naddr >= maxaddrs)
-			break;
-	}
+	netdev_for_each_mc_addr(ha, dev)
+		if (index++ >= offset) {
+			addr[naddr++] = ha->addr;
+			if (naddr >= maxaddrs)
+				break;
+		}
 	return naddr;
 }
 
@@ -862,16 +870,20 @@ static int set_addr_filters(const struct net_device *dev, bool sleep)
 	u64 mhash = 0;
 	u64 uhash = 0;
 	bool free = true;
-	u16 filt_idx[7];
+	unsigned int offset, naddr;
 	const u8 *addr[7];
-	int ret, naddr = 0;
+	int ret;
 	const struct port_info *pi = netdev_priv(dev);
 
 	/* first do the secondary unicast addresses */
-	naddr = collect_netdev_uc_list_addrs(dev, addr, ARRAY_SIZE(addr));
-	if (naddr > 0) {
+	for (offset = 0; ; offset += naddr) {
+		naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
+						     ARRAY_SIZE(addr));
+		if (naddr == 0)
+			break;
+
 		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
-					  naddr, addr, filt_idx, &uhash, sleep);
+					  naddr, addr, NULL, &uhash, sleep);
 		if (ret < 0)
 			return ret;
 
@@ -879,12 +891,17 @@ static int set_addr_filters(const struct net_device *dev, bool sleep)
 	}
 
 	/* next set up the multicast addresses */
-	naddr = collect_netdev_mc_list_addrs(dev, addr, ARRAY_SIZE(addr));
-	if (naddr > 0) {
+	for (offset = 0; ; offset += naddr) {
+		naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
+						     ARRAY_SIZE(addr));
+		if (naddr == 0)
+			break;
+
 		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
-					  naddr, addr, filt_idx, &mhash, sleep);
+					  naddr, addr, NULL, &mhash, sleep);
 		if (ret < 0)
 			return ret;
+		free = false;
 	}
 
 	return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
diff --git a/drivers/net/cxgb4vf/t4vf_hw.c b/drivers/net/cxgb4vf/t4vf_hw.c
index e306c20dfaee..19520afe1a12 100644
--- a/drivers/net/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/cxgb4vf/t4vf_hw.c
@@ -1014,48 +1014,72 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free,
 			unsigned int naddr, const u8 **addr, u16 *idx,
 			u64 *hash, bool sleep_ok)
 {
-	int i, ret;
+	int offset, ret = 0;
+	unsigned nfilters = 0;
+	unsigned int rem = naddr;
 	struct fw_vi_mac_cmd cmd, rpl;
-	struct fw_vi_mac_exact *p;
-	size_t len16;
 
-	if (naddr > ARRAY_SIZE(cmd.u.exact))
+	if (naddr > FW_CLS_TCAM_NUM_ENTRIES)
 		return -EINVAL;
-	len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
-				      u.exact[naddr]), 16);
 
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) |
-				     FW_CMD_REQUEST |
-				     FW_CMD_WRITE |
-				     (free ? FW_CMD_EXEC : 0) |
-				     FW_VI_MAC_CMD_VIID(viid));
-	cmd.freemacs_to_len16 = cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) |
-					    FW_CMD_LEN16(len16));
+	for (offset = 0; offset < naddr; /**/) {
+		unsigned int fw_naddr = (rem < ARRAY_SIZE(cmd.u.exact)
+					 ? rem
+					 : ARRAY_SIZE(cmd.u.exact));
+		size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
+						     u.exact[fw_naddr]), 16);
+		struct fw_vi_mac_exact *p;
+		int i;
 
-	for (i = 0, p = cmd.u.exact; i < naddr; i++, p++) {
-		p->valid_to_idx =
-			cpu_to_be16(FW_VI_MAC_CMD_VALID |
-				    FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC));
-		memcpy(p->macaddr, addr[i], sizeof(p->macaddr));
-	}
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) |
+					     FW_CMD_REQUEST |
+					     FW_CMD_WRITE |
+					     (free ? FW_CMD_EXEC : 0) |
+					     FW_VI_MAC_CMD_VIID(viid));
+		cmd.freemacs_to_len16 =
+			cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) |
+				    FW_CMD_LEN16(len16));
+
+		for (i = 0, p = cmd.u.exact; i < fw_naddr; i++, p++) {
+			p->valid_to_idx = cpu_to_be16(
+				FW_VI_MAC_CMD_VALID |
+				FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC));
+			memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr));
+		}
+
+
+		ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl,
+					sleep_ok);
+		if (ret && ret != -ENOMEM)
+			break;
 
-	ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl, sleep_ok);
-	if (ret)
-		return ret;
-
-	for (i = 0, p = rpl.u.exact; i < naddr; i++, p++) {
-		u16 index = FW_VI_MAC_CMD_IDX_GET(be16_to_cpu(p->valid_to_idx));
-
-		if (idx)
-			idx[i] = (index >= FW_CLS_TCAM_NUM_ENTRIES
-				  ? 0xffff
-				  : index);
-		if (index < FW_CLS_TCAM_NUM_ENTRIES)
-			ret++;
-		else if (hash)
-			*hash |= (1 << hash_mac_addr(addr[i]));
+		for (i = 0, p = rpl.u.exact; i < fw_naddr; i++, p++) {
+			u16 index = FW_VI_MAC_CMD_IDX_GET(
+				be16_to_cpu(p->valid_to_idx));
+
+			if (idx)
+				idx[offset+i] =
+					(index >= FW_CLS_TCAM_NUM_ENTRIES
+					 ? 0xffff
+					 : index);
+			if (index < FW_CLS_TCAM_NUM_ENTRIES)
+				nfilters++;
+			else if (hash)
+				*hash |= (1ULL << hash_mac_addr(addr[offset+i]));
+		}
+
+		free = false;
+		offset += fw_naddr;
+		rem -= fw_naddr;
 	}
+
+	/*
+	 * If there were no errors or we merely ran out of room in our MAC
+	 * address arena, return the number of filters actually written.
+	 */
+	if (ret == 0 || ret == -ENOMEM)
+		ret = nfilters;
 	return ret;
 }
 
-- 
cgit v1.2.3


From 03fe5f3ef7eab88e1405baa52a7923fbf337230b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 24 Nov 2010 13:54:54 +0000
Subject: NET: wan/x25_asy, move lapb_unregister to x25_asy_close_tty

We register lapb when tty is created, but unregister it only when the
device is UP. So move the lapb_unregister to x25_asy_close_tty after
the device is down.

The old behaviour causes ldisc switching to fail each second attempt,
because we noted for us that the device is unused, so we use it the
second time, but labp layer still have it registered, so it fails
obviously.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Reported-by: Sergey Lapin <slapin@ossfans.org>
Cc: Andrew Hendry <andrew.hendry@gmail.com>
Tested-by: Sergey Lapin <slapin@ossfans.org>
Tested-by: Mikhail Ulyanov <ulyanov.mikhail@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/x25_asy.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index d81ad8397885..cf05504d9511 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -498,7 +498,6 @@ norbuff:
 static int x25_asy_close(struct net_device *dev)
 {
 	struct x25_asy *sl = netdev_priv(dev);
-	int err;
 
 	spin_lock(&sl->lock);
 	if (sl->tty)
@@ -507,10 +506,6 @@ static int x25_asy_close(struct net_device *dev)
 	netif_stop_queue(dev);
 	sl->rcount = 0;
 	sl->xleft  = 0;
-	err = lapb_unregister(dev);
-	if (err != LAPB_OK)
-		printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n",
-			err);
 	spin_unlock(&sl->lock);
 	return 0;
 }
@@ -595,6 +590,7 @@ static int x25_asy_open_tty(struct tty_struct *tty)
 static void x25_asy_close_tty(struct tty_struct *tty)
 {
 	struct x25_asy *sl = tty->disc_data;
+	int err;
 
 	/* First make sure we're connected. */
 	if (!sl || sl->magic != X25_ASY_MAGIC)
@@ -605,6 +601,11 @@ static void x25_asy_close_tty(struct tty_struct *tty)
 		dev_close(sl->dev);
 	rtnl_unlock();
 
+	err = lapb_unregister(sl->dev);
+	if (err != LAPB_OK)
+		printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n",
+			err);
+
 	tty->disc_data = NULL;
 	sl->tty = NULL;
 	x25_asy_free(sl);
-- 
cgit v1.2.3


From 8e65c0ece6f2aa732f9b755331869c67aeb1c7f6 Mon Sep 17 00:00:00 2001
From: Filip Aben <f.aben@option.com>
Date: Thu, 25 Nov 2010 03:40:50 +0000
Subject: hso: fix disable_net

The HSO driver incorrectly creates a serial device instead of a net
device when disable_net is set. It shouldn't create anything for the
network interface.

Signed-off-by: Filip Aben <f.aben@option.com>
Reported-by: Piotr Isajew <pki@ex.com.pl>
Reported-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index b154a94de03e..62e9e8dc8190 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2994,12 +2994,14 @@ static int hso_probe(struct usb_interface *interface,
 
 	case HSO_INTF_BULK:
 		/* It's a regular bulk interface */
-		if (((port_spec & HSO_PORT_MASK) == HSO_PORT_NETWORK) &&
-		    !disable_net)
-			hso_dev = hso_create_net_device(interface, port_spec);
-		else
+		if ((port_spec & HSO_PORT_MASK) == HSO_PORT_NETWORK) {
+			if (!disable_net)
+				hso_dev =
+				    hso_create_net_device(interface, port_spec);
+		} else {
 			hso_dev =
 			    hso_create_bulk_serial_device(interface, port_spec);
+		}
 		if (!hso_dev)
 			goto exit;
 		break;
-- 
cgit v1.2.3


From 71993e62a47dabddf10302807d6aa260455503f4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Nov 2010 13:56:09 -0800
Subject: Rename 'pipe_info()' to 'get_pipe_info()'

.. and change it to take the 'file' pointer instead of an inode, since
that's what all users want anyway.

The renaming is preparatory to exporting it to other users.  The old
'pipe_info()' name was too generic and is already used elsewhere, so
before making the function public we need to use a more specific name.

Cc: Jens Axboe <jaxboe@fusionio.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/splice.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index 8f1dfaecc8f0..0d92dabcc576 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1316,12 +1316,11 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
  * location, so checking ->i_pipe is not enough to verify that this is a
  * pipe.
  */
-static inline struct pipe_inode_info *pipe_info(struct inode *inode)
+static inline struct pipe_inode_info *get_pipe_info(struct file *file)
 {
-	if (S_ISFIFO(inode->i_mode))
-		return inode->i_pipe;
+	struct inode *i = file->f_path.dentry->d_inode;
 
-	return NULL;
+	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
 }
 
 /*
@@ -1336,8 +1335,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 	loff_t offset, *off;
 	long ret;
 
-	ipipe = pipe_info(in->f_path.dentry->d_inode);
-	opipe = pipe_info(out->f_path.dentry->d_inode);
+	ipipe = get_pipe_info(in);
+	opipe = get_pipe_info(out);
 
 	if (ipipe && opipe) {
 		if (off_in || off_out)
@@ -1555,7 +1554,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
 	int error;
 	long ret;
 
-	pipe = pipe_info(file->f_path.dentry->d_inode);
+	pipe = get_pipe_info(file);
 	if (!pipe)
 		return -EBADF;
 
@@ -1642,7 +1641,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
 	};
 	long ret;
 
-	pipe = pipe_info(file->f_path.dentry->d_inode);
+	pipe = get_pipe_info(file);
 	if (!pipe)
 		return -EBADF;
 
@@ -2022,8 +2021,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 static long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags)
 {
-	struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode);
-	struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode);
+	struct pipe_inode_info *ipipe = get_pipe_info(in);
+	struct pipe_inode_info *opipe = get_pipe_info(out);
 	int ret = -EINVAL;
 
 	/*
-- 
cgit v1.2.3


From c66fb347946ebdd5b10908866ecc9fa05ee2cf3d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Nov 2010 14:09:57 -0800
Subject: Export 'get_pipe_info()' to other users

And in particular, use it in 'pipe_fcntl()'.

The other pipe functions do not need to use the 'careful' version, since
they are only ever called for things that are already known to be pipes.

The normal read/write/ioctl functions are called through the file
operations structures, so if a file isn't a pipe, they'd never get
called.  But pipe_fcntl() is special, and called directly from the
generic fcntl code, and needs to use the same careful function that the
splice code is using.

Cc: Jens Axboe <jaxboe@fusionio.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c                 |  2 +-
 fs/splice.c               | 11 -----------
 include/linux/pipe_fs_i.h | 12 ++++++++++++
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index a8012a955720..b8997f8c6324 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1204,7 +1204,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct pipe_inode_info *pipe;
 	long ret;
 
-	pipe = file->f_path.dentry->d_inode->i_pipe;
+	pipe = get_pipe_info(file);
 	if (!pipe)
 		return -EBADF;
 
diff --git a/fs/splice.c b/fs/splice.c
index 0d92dabcc576..ce2f02579e35 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1311,17 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
 			       struct pipe_inode_info *opipe,
 			       size_t len, unsigned int flags);
-/*
- * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- * location, so checking ->i_pipe is not enough to verify that this is a
- * pipe.
- */
-static inline struct pipe_inode_info *get_pipe_info(struct file *file)
-{
-	struct inode *i = file->f_path.dentry->d_inode;
-
-	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
-}
 
 /*
  * Determine where to splice to/from.
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 445796945ac9..3c5ac3147428 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -161,4 +161,16 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 /* for F_SETPIPE_SZ and F_GETPIPE_SZ */
 long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
 
+/*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+static inline struct pipe_inode_info *get_pipe_info(struct file *file)
+{
+	struct inode *i = file->f_path.dentry->d_inode;
+
+	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
+}
+
 #endif
-- 
cgit v1.2.3


From 72083646528d4887b920deb71b37e09bc7d227bb Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Nov 2010 16:27:19 -0800
Subject: Un-inline get_pipe_info() helper function

This avoids some include-file hell, and the function isn't really
important enough to be inlined anyway.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c                 | 12 ++++++++++++
 include/linux/pipe_fs_i.h | 13 +------------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index b8997f8c6324..04629f36e397 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1199,6 +1199,18 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
 	return ret;
 }
 
+/*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+struct pipe_inode_info *get_pipe_info(struct file *file)
+{
+	struct inode *i = file->f_path.dentry->d_inode;
+
+	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
+}
+
 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct pipe_inode_info *pipe;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 3c5ac3147428..bb27d7ec2fb9 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -160,17 +160,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 
 /* for F_SETPIPE_SZ and F_GETPIPE_SZ */
 long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
-
-/*
- * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- * location, so checking ->i_pipe is not enough to verify that this is a
- * pipe.
- */
-static inline struct pipe_inode_info *get_pipe_info(struct file *file)
-{
-	struct inode *i = file->f_path.dentry->d_inode;
-
-	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
-}
+struct pipe_inode_info *get_pipe_info(struct file *file);
 
 #endif
-- 
cgit v1.2.3


From 163cf09c2a0ee5cac6285f9347975bd1e97725da Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Sun, 28 Nov 2010 19:56:33 -0500
Subject: Btrfs: deal with DIO bios that span more than one ordered extent

The new DIO bio splitting code has problems when the bio
spans more than one ordered extent.  This will happen as the
generic DIO code merges our get_blocks calls together into
a bigger single bio.

This fixes things by walking forward in the ordered extent
code finding all the overlapping ordered extents and completing them
all at once.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c        | 23 ++++++++++++++---
 fs/btrfs/ordered-data.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/ordered-data.h |  3 +++
 3 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6df921f218fb..0f34cae0a633 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5602,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_ordered_extent *ordered = NULL;
 	struct extent_state *cached_state = NULL;
+	u64 ordered_offset = dip->logical_offset;
+	u64 ordered_bytes = dip->bytes;
 	int ret;
 
 	if (err)
 		goto out_done;
-
-	ret = btrfs_dec_test_ordered_pending(inode, &ordered,
-					     dip->logical_offset, dip->bytes);
+again:
+	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
+						   &ordered_offset,
+						   ordered_bytes);
 	if (!ret)
-		goto out_done;
+		goto out_test;
 
 	BUG_ON(!ordered);
 
@@ -5670,8 +5673,20 @@ out_unlock:
 out:
 	btrfs_delalloc_release_metadata(inode, ordered->len);
 	btrfs_end_transaction(trans, root);
+	ordered_offset = ordered->file_offset + ordered->len;
 	btrfs_put_ordered_extent(ordered);
 	btrfs_put_ordered_extent(ordered);
+
+out_test:
+	/*
+	 * our bio might span multiple ordered extents.  If we haven't
+	 * completed the accounting for the whole dio, go back and try again
+	 */
+	if (ordered_offset < dip->logical_offset + dip->bytes) {
+		ordered_bytes = dip->logical_offset + dip->bytes -
+			ordered_offset;
+		goto again;
+	}
 out_done:
 	bio->bi_private = dip->private;
 
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f4621f6deca1..ae7737e352c9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -248,6 +248,73 @@ int btrfs_add_ordered_sum(struct inode *inode,
 	return 0;
 }
 
+/*
+ * this is used to account for finished IO across a given range
+ * of the file.  The IO may span ordered extents.  If
+ * a given ordered_extent is completely done, 1 is returned, otherwise
+ * 0.
+ *
+ * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
+ * to make sure this function only returns 1 once for a given ordered extent.
+ *
+ * file_offset is updated to one byte past the range that is recorded as
+ * complete.  This allows you to walk forward in the file.
+ */
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+				   struct btrfs_ordered_extent **cached,
+				   u64 *file_offset, u64 io_size)
+{
+	struct btrfs_ordered_inode_tree *tree;
+	struct rb_node *node;
+	struct btrfs_ordered_extent *entry = NULL;
+	int ret;
+	u64 dec_end;
+	u64 dec_start;
+	u64 to_dec;
+
+	tree = &BTRFS_I(inode)->ordered_tree;
+	spin_lock(&tree->lock);
+	node = tree_search(tree, *file_offset);
+	if (!node) {
+		ret = 1;
+		goto out;
+	}
+
+	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+	if (!offset_in_entry(entry, *file_offset)) {
+		ret = 1;
+		goto out;
+	}
+
+	dec_start = max(*file_offset, entry->file_offset);
+	dec_end = min(*file_offset + io_size, entry->file_offset +
+		      entry->len);
+	*file_offset = dec_end;
+	if (dec_start > dec_end) {
+		printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
+		       (unsigned long long)dec_start,
+		       (unsigned long long)dec_end);
+	}
+	to_dec = dec_end - dec_start;
+	if (to_dec > entry->bytes_left) {
+		printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+		       (unsigned long long)entry->bytes_left,
+		       (unsigned long long)to_dec);
+	}
+	entry->bytes_left -= to_dec;
+	if (entry->bytes_left == 0)
+		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+	else
+		ret = 1;
+out:
+	if (!ret && cached && entry) {
+		*cached = entry;
+		atomic_inc(&entry->refs);
+	}
+	spin_unlock(&tree->lock);
+	return ret == 0;
+}
+
 /*
  * this is used to account for finished IO across a given range
  * of the file.  The IO should not span ordered extents.  If
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8ac365492a3f..61dca83119dd 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
 int btrfs_dec_test_ordered_pending(struct inode *inode,
 				   struct btrfs_ordered_extent **cached,
 				   u64 file_offset, u64 io_size);
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+				   struct btrfs_ordered_extent **cached,
+				   u64 *file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 			     u64 start, u64 len, u64 disk_len, int type);
 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
-- 
cgit v1.2.3


From 5c7e57f7cddb83d81d83fefa5822dfe80891130e Mon Sep 17 00:00:00 2001
From: Breno Leitao <breno@cafe.(none)>
Date: Fri, 26 Nov 2010 07:26:27 +0000
Subject: ehea: Add some info messages and fix an issue

This patch adds some debug information about ehea not being able to
allocate enough spaces. Also it correctly updates the amount of available
skb.

Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ehea/ehea_main.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 182b2a7be8dc..3d0af08483a1 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -400,6 +400,7 @@ static void ehea_refill_rq1(struct ehea_port_res *pr, int index, int nr_of_wqes)
 			skb_arr_rq1[index] = netdev_alloc_skb(dev,
 							      EHEA_L_PKT_SIZE);
 			if (!skb_arr_rq1[index]) {
+				ehea_info("Unable to allocate enough skb in the array\n");
 				pr->rq1_skba.os_skbs = fill_wqes - i;
 				break;
 			}
@@ -422,13 +423,20 @@ static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a)
 	struct net_device *dev = pr->port->netdev;
 	int i;
 
-	for (i = 0; i < pr->rq1_skba.len; i++) {
+	if (nr_rq1a > pr->rq1_skba.len) {
+		ehea_error("NR_RQ1A bigger than skb array len\n");
+		return;
+	}
+
+	for (i = 0; i < nr_rq1a; i++) {
 		skb_arr_rq1[i] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE);
-		if (!skb_arr_rq1[i])
+		if (!skb_arr_rq1[i]) {
+			ehea_info("No enough memory to allocate skb array\n");
 			break;
+		}
 	}
 	/* Ring doorbell */
-	ehea_update_rq1a(pr->qp, nr_rq1a);
+	ehea_update_rq1a(pr->qp, i);
 }
 
 static int ehea_refill_rq_def(struct ehea_port_res *pr,
@@ -735,8 +743,10 @@ static int ehea_proc_rwqes(struct net_device *dev,
 
 					skb = netdev_alloc_skb(dev,
 							       EHEA_L_PKT_SIZE);
-					if (!skb)
+					if (!skb) {
+						ehea_info("Not enough memory to allocate skb\n");
 						break;
+					}
 				}
 				skb_copy_to_linear_data(skb, ((char *)cqe) + 64,
 						 cqe->num_bytes_transfered - 4);
-- 
cgit v1.2.3


From b4ff3c90e6066bacc8a92111752fe9e4f4c45cca Mon Sep 17 00:00:00 2001
From: Nagendra Tomar <tomer_iisc@yahoo.com>
Date: Fri, 26 Nov 2010 14:26:27 +0000
Subject: inet: Fix __inet_inherit_port() to correctly increment bsockets and
 num_owners

inet sockets corresponding to passive connections are added to the bind hash
using ___inet_inherit_port(). These sockets are later removed from the bind
hash using __inet_put_port(). These two functions are not exactly symmetrical.
__inet_put_port() decrements hashinfo->bsockets and tb->num_owners, whereas
___inet_inherit_port() does not increment them. This results in both of these
going to -ve values.

This patch fixes this by calling inet_bind_hash() from ___inet_inherit_port(),
which does the right thing.

'bsockets' and 'num_owners' were introduced by commit a9d8f9110d7e953c
(inet: Allowing more than 64k connections and heavily optimize bind(0))

Signed-off-by: Nagendra Singh Tomar <tomer_iisc@yahoo.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1b344f30b463..3c0369a3a663 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
 			}
 		}
 	}
-	sk_add_bind_node(child, &tb->owners);
-	inet_csk(child)->icsk_bind_hash = tb;
+	inet_bind_hash(child, tb, port);
 	spin_unlock(&head->lock);
 
 	return 0;
-- 
cgit v1.2.3


From d830418e4085d65b3f8bad3216a37bc986ecd17d Mon Sep 17 00:00:00 2001
From: Yang Li <leoli@freescale.com>
Date: Thu, 25 Nov 2010 23:29:58 +0000
Subject: ucc_geth: fix ucc halt problem in half duplex mode

In commit 58933c64(ucc_geth: Fix the wrong the Rx/Tx FIFO size),
the UCC_GETH_UTFTT_INIT is set to 512 based on the recommendation
of the QE Reference Manual.  But that will sometimes cause tx halt
while working in half duplex mode.

According to errata draft QE_GENERAL-A003(High Tx Virtual FIFO
threshold size can cause UCC to halt), setting UTFTT less than
[(UTFS x (M - 8)/M) - 128] will prevent this from happening
(M is the minimum buffer size).

The patch changes UTFTT back to 256.

Signed-off-by: Li Yang <leoli@freescale.com>
Cc: Jean-Denis Boyer <jdboyer@media5corp.com>
Cc: Andreas Schmitz <Andreas.Schmitz@riedel.net>
Cc: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ucc_geth.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h
index 05a95586f3c5..055b87ab4f07 100644
--- a/drivers/net/ucc_geth.h
+++ b/drivers/net/ucc_geth.h
@@ -899,7 +899,8 @@ struct ucc_geth_hardware_statistics {
 #define UCC_GETH_UTFS_INIT                      512	/* Tx virtual FIFO size
 							 */
 #define UCC_GETH_UTFET_INIT                     256	/* 1/2 utfs */
-#define UCC_GETH_UTFTT_INIT                     512
+#define UCC_GETH_UTFTT_INIT                     256	/* 1/2 utfs
+							   due to errata */
 /* Gigabit Ethernet (1000 Mbps) */
 #define UCC_GETH_URFS_GIGA_INIT                 4096/*2048*/	/* Rx virtual
 								   FIFO size */
-- 
cgit v1.2.3


From 5a92bc88cef279261d3f138e25850c122df67045 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 29 Nov 2010 09:49:11 -0500
Subject: Btrfs: don't use migrate page without CONFIG_MIGRATION

Fixes compile error

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 57c9d8eeb7dc..33b6d459494c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -712,8 +712,11 @@ static int btree_migratepage(struct address_space *mapping,
 	if (page_has_private(page) &&
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
-
+#ifdef CONFIG_MIGRATION
 	return migrate_page(mapping, newpage, page);
+#else
+	return -ENOSYS;
+#endif
 }
 
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
@@ -821,7 +824,9 @@ static const struct address_space_operations btree_aops = {
 	.releasepage	= btree_releasepage,
 	.invalidatepage = btree_invalidatepage,
 	.sync_page	= block_sync_page,
+#ifdef CONFIG_MIGRATION
 	.migratepage	= btree_migratepage,
+#endif
 };
 
 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
-- 
cgit v1.2.3


From a1dcfcb7f2d08717325157ed3c1db2362d6eb8c9 Mon Sep 17 00:00:00 2001
From: Toshiharu Okada <toshiharu-linux@dsn.okisemi.com>
Date: Sun, 21 Nov 2010 19:58:37 +0000
Subject: pch_gbe dreiver: chang author

This driver's AUTHOR was changed to "Toshiharu Okada" from "Masayuki Ohtake".
I update the Kconfig, renamed "Topcliff" to "EG20T".

Signed-off-by: Toshiharu Okada <toshiharu-linux@dsn.okisemi.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig                | 6 +++---
 drivers/net/pch_gbe/pch_gbe_main.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f6668cdaac85..a445a0492c52 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2543,10 +2543,10 @@ config PCH_GBE
 	depends on PCI
 	select MII
 	---help---
-	  This is a gigabit ethernet driver for Topcliff PCH.
-	  Topcliff PCH is the platform controller hub that is used in Intel's
+	  This is a gigabit ethernet driver for EG20T PCH.
+	  EG20T PCH is the platform controller hub that is used in Intel's
 	  general embedded platform.
-	  Topcliff PCH has Gigabit Ethernet interface.
+	  EG20T PCH has Gigabit Ethernet interface.
 	  Using this interface, it is able to access system devices connected
 	  to Gigabit Ethernet.
 	  This driver enables Gigabit Ethernet function.
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index 472056b47440..03a1d280105f 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 1999 - 2010 Intel Corporation.
- * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
+ * Copyright (C) 2010 OKI SEMICONDUCTOR CO., LTD.
  *
  * This code was derived from the Intel e1000e Linux driver.
  *
@@ -2464,8 +2464,8 @@ static void __exit pch_gbe_exit_module(void)
 module_init(pch_gbe_init_module);
 module_exit(pch_gbe_exit_module);
 
-MODULE_DESCRIPTION("OKI semiconductor PCH Gigabit ethernet Driver");
-MODULE_AUTHOR("OKI semiconductor, <masa-korg@dsn.okisemi.com>");
+MODULE_DESCRIPTION("EG20T PCH Gigabit ethernet Driver");
+MODULE_AUTHOR("OKI SEMICONDUCTOR, <toshiharu-linux@dsn.okisemi.com>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, pch_gbe_pcidev_id);
-- 
cgit v1.2.3


From 50a4205333c5e545551f1f82b3004ca635407c5c Mon Sep 17 00:00:00 2001
From: Toshiharu Okada <toshiharu-linux@dsn.okisemi.com>
Date: Mon, 29 Nov 2010 06:18:07 +0000
Subject: pch_gbe driver: The wrong of initializer entry

The wrong of initializer entry was modified.

Signed-off-by: Toshiharu Okada <toshiharu-linux@dsn.okisemi.com>
Reported-by: Dr. David Alan Gilbert <linux@treblig.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pch_gbe/pch_gbe_param.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/pch_gbe/pch_gbe_param.c b/drivers/net/pch_gbe/pch_gbe_param.c
index 2510146fc560..ef0996a0eaaa 100644
--- a/drivers/net/pch_gbe/pch_gbe_param.c
+++ b/drivers/net/pch_gbe/pch_gbe_param.c
@@ -434,8 +434,8 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter)
 			.err  = "using default of "
 				__MODULE_STRING(PCH_GBE_DEFAULT_TXD),
 			.def  = PCH_GBE_DEFAULT_TXD,
-			.arg  = { .r = { .min = PCH_GBE_MIN_TXD } },
-			.arg  = { .r = { .max = PCH_GBE_MAX_TXD } }
+			.arg  = { .r = { .min = PCH_GBE_MIN_TXD,
+					 .max = PCH_GBE_MAX_TXD } }
 		};
 		struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
 		tx_ring->count = TxDescriptors;
@@ -450,8 +450,8 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter)
 			.err  = "using default of "
 				__MODULE_STRING(PCH_GBE_DEFAULT_RXD),
 			.def  = PCH_GBE_DEFAULT_RXD,
-			.arg  = { .r = { .min = PCH_GBE_MIN_RXD } },
-			.arg  = { .r = { .max = PCH_GBE_MAX_RXD } }
+			.arg  = { .r = { .min = PCH_GBE_MIN_RXD,
+					 .max = PCH_GBE_MAX_RXD } }
 		};
 		struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring;
 		rx_ring->count = RxDescriptors;
-- 
cgit v1.2.3


From 25888e30319f8896fc656fc68643e6a078263060 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 25 Nov 2010 04:11:39 +0000
Subject: af_unix: limit recursion level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Its easy to eat all kernel memory and trigger NMI watchdog, using an
exploit program that queues unix sockets on top of others.

lkml ref : http://lkml.org/lkml/2010/11/25/8

This mechanism is used in applications, one choice we have is to have a
recursion limit.

Other limits might be needed as well (if we queue other types of files),
since the passfd mechanism is currently limited by socket receive queue
sizes only.

Add a recursion_level to unix socket, allowing up to 4 levels.

Each time we send an unix socket through sendfd mechanism, we copy its
recursion level (plus one) to receiver. This recursion level is cleared
when socket receive queue is emptied.

Reported-by: Марк Коренберг <socketpair@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h |  2 ++
 net/unix/af_unix.c    | 37 ++++++++++++++++++++++++++++++++-----
 net/unix/garbage.c    |  2 +-
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 90c9e2872f27..18e5c3f67580 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -10,6 +10,7 @@ extern void unix_inflight(struct file *fp);
 extern void unix_notinflight(struct file *fp);
 extern void unix_gc(void);
 extern void wait_for_unix_gc(void);
+extern struct sock *unix_get_socket(struct file *filp);
 
 #define UNIX_HASH_SIZE	256
 
@@ -56,6 +57,7 @@ struct unix_sock {
 	spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
 	unsigned int		gc_maybe_cycle : 1;
+	unsigned char		recursion_level;
 	struct socket_wq	peer_wq;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3c95304a0817..2268e6798124 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
+#define MAX_RECURSION_LEVEL 4
+
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
+	unsigned char max_level = 0;
+	int unix_sock_count = 0;
+
+	for (i = scm->fp->count - 1; i >= 0; i--) {
+		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
+
+		if (sk) {
+			unix_sock_count++;
+			max_level = max(max_level,
+					unix_sk(sk)->recursion_level);
+		}
+	}
+	if (unlikely(max_level > MAX_RECURSION_LEVEL))
+		return -ETOOMANYREFS;
 
 	/*
 	 * Need to duplicate file references for the sake of garbage
@@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	if (!UNIXCB(skb).fp)
 		return -ENOMEM;
 
-	for (i = scm->fp->count-1; i >= 0; i--)
-		unix_inflight(scm->fp->fp[i]);
-	return 0;
+	if (unix_sock_count) {
+		for (i = scm->fp->count - 1; i >= 0; i--)
+			unix_inflight(scm->fp->fp[i]);
+	}
+	return max_level;
 }
 
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sk_buff *skb;
 	long timeo;
 	struct scm_cookie tmp_scm;
+	int max_level;
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto out;
 
 	err = unix_scm_to_skb(siocb->scm, skb, true);
-	if (err)
+	if (err < 0)
 		goto out_free;
+	max_level = err + 1;
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1514,6 +1534,8 @@ restart:
 	if (sock_flag(other, SOCK_RCVTSTAMP))
 		__net_timestamp(skb);
 	skb_queue_tail(&other->sk_receive_queue, skb);
+	if (max_level > unix_sk(other)->recursion_level)
+		unix_sk(other)->recursion_level = max_level;
 	unix_state_unlock(other);
 	other->sk_data_ready(other, len);
 	sock_put(other);
@@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	int sent = 0;
 	struct scm_cookie tmp_scm;
 	bool fds_sent = false;
+	int max_level;
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 		/* Only send the fds in the first buffer */
 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
-		if (err) {
+		if (err < 0) {
 			kfree_skb(skb);
 			goto out_err;
 		}
+		max_level = err + 1;
 		fds_sent = true;
 
 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			goto pipe_err_free;
 
 		skb_queue_tail(&other->sk_receive_queue, skb);
+		if (max_level > unix_sk(other)->recursion_level)
+			unix_sk(other)->recursion_level = max_level;
 		unix_state_unlock(other);
 		other->sk_data_ready(other, size);
 		sent += size;
@@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		unix_state_lock(sk);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb == NULL) {
+			unix_sk(sk)->recursion_level = 0;
 			if (copied >= target)
 				goto unlock;
 
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 40df93d1cf35..f89f83bf828e 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
 unsigned int unix_tot_inflight;
 
 
-static struct sock *unix_get_socket(struct file *filp)
+struct sock *unix_get_socket(struct file *filp)
 {
 	struct sock *u_sock = NULL;
 	struct inode *inode = filp->f_path.dentry->d_inode;
-- 
cgit v1.2.3


From 3f0d3d016d89a5efb8b926d4707eb21fa13f3d27 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 21 Oct 2010 17:42:40 -0400
Subject: tpm: Autodetect itpm devices

Some Lenovos have TPMs that require a quirk to function correctly. This can
be autodetected by checking whether the device has a _HID of INTC0102. This
is an invalid PNPid, and as such is discarded by the pnp layer - however
it's still present in the ACPI code, so we can pull it out that way. This
means that the quirk won't be automatically applied on non-ACPI systems,
but without ACPI we don't have any way to identify the chip anyway so I
don't think that's a great concern.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Tested-by: Andy Isaacson <adi@hexapodia.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/char/tpm/tpm_tis.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 1030f8420137..c17a305ecb28 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/wait.h>
+#include <linux/acpi.h>
 #include "tpm.h"
 
 #define TPM_HEADER_SIZE 10
@@ -78,6 +79,26 @@ enum tis_defaults {
 static LIST_HEAD(tis_chips);
 static DEFINE_SPINLOCK(tis_lock);
 
+#ifdef CONFIG_ACPI
+static int is_itpm(struct pnp_dev *dev)
+{
+	struct acpi_device *acpi = pnp_acpi_device(dev);
+	struct acpi_hardware_id *id;
+
+	list_for_each_entry(id, &acpi->pnp.ids, list) {
+		if (!strcmp("INTC0102", id->id))
+			return 1;
+	}
+
+	return 0;
+}
+#else
+static int is_itpm(struct pnp_dev *dev)
+{
+	return 0;
+}
+#endif
+
 static int check_locality(struct tpm_chip *chip, int l)
 {
 	if ((ioread8(chip->vendor.iobase + TPM_ACCESS(l)) &
@@ -472,6 +493,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
 		 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
 		 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
 
+	if (is_itpm(to_pnp_dev(dev)))
+		itpm = 1;
+
 	if (itpm)
 		dev_info(dev, "Intel iTPM workaround enabled\n");
 
-- 
cgit v1.2.3


From bcb38ceb225f5d5b2198a2755277cd441ed1e82b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 30 Nov 2010 09:15:46 +1000
Subject: Revert "debug_locks: set oops_in_progress if we will log messages."

This reverts commit e0fdace10e75dac67d906213b780ff1b1a4cc360.

On-list discussion seems to suggest that the robustness fixes for printk
make this unnecessary and DaveM has also agreed in person at Kernel Summit
and on list.

The main problem with this code is once we hit a lockdep splat we always
keep oops_in_progress set, the console layer uses oops_in_progress with KMS
to decide when it should be showing the oops and not showing X, so it causes
problems around suspend/resume time when a userspace resume can cause a console
switch away from X, only if oops_in_progress is set (which is what we want
if an oops actually is in progress, but not because we had a lockdep splat
2 days prior).

Cc: David S Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/debug_locks.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 5bf0020b9248..b1c177307677 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -8,7 +8,6 @@
  *
  *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
-#include <linux/kernel.h>
 #include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
@@ -39,7 +38,6 @@ int debug_locks_off(void)
 {
 	if (__debug_locks_off()) {
 		if (!debug_locks_silent) {
-			oops_in_progress = 1;
 			console_verbose();
 			return 1;
 		}
-- 
cgit v1.2.3


From f2e785ed5fb8e5fe5063ee2ba1c8f150396c53c6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 26 Nov 2010 15:38:45 +0100
Subject: powerpc: Use call_rcu_sched() for pagetables

PowerPC relies on IRQ-disable to guard against RCU quiecent states,
use the appropriate RCU call version.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/pgtable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 2c7e801ab20b..6a3997f98dfb 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -92,7 +92,7 @@ static void pte_free_rcu_callback(struct rcu_head *head)
 
 static void pte_free_submit(struct pte_freelist_batch *batch)
 {
-	call_rcu(&batch->rcu, pte_free_rcu_callback);
+	call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
 }
 
 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
-- 
cgit v1.2.3


From e8a7e48bb248a1196484d3f8afa53bded2b24e71 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 Nov 2010 20:42:04 -0800
Subject: Linux 2.6.37-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b31d21377e4c..9e3c89030f5c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 37
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From ae3e033362430f2684bb3519456c88d14e182f01 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <eballetbo@gmail.com>
Date: Sat, 23 Oct 2010 16:48:59 +0000
Subject: omap3: IGEP v2: Remove igep2_config array

omap_board_config_size is implicitly initialized to 0 in
plat-omap/common.c, get_config() won't dereference omap_board_config,
so we can remove the empty igep2_config array.

Signed-off-by: Enric Balletbo i Serra <eballetbo@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-igep0020.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index 20b199fa5a60..42dcbf4f1602 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -247,9 +247,6 @@ static inline void __init igep2_init_smsc911x(void)
 static inline void __init igep2_init_smsc911x(void) { }
 #endif
 
-static struct omap_board_config_kernel igep2_config[] __initdata = {
-};
-
 static struct regulator_consumer_supply igep2_vmmc1_supply = {
 	.supply		= "vmmc",
 };
@@ -486,8 +483,6 @@ static struct platform_device *igep2_devices[] __initdata = {
 
 static void __init igep2_init_irq(void)
 {
-	omap_board_config = igep2_config;
-	omap_board_config_size = ARRAY_SIZE(igep2_config);
 	omap2_init_common_hw(m65kxxxxam_sdrc_params, m65kxxxxam_sdrc_params);
 	omap_init_irq();
 	omap_gpio_init();
-- 
cgit v1.2.3


From 235228ba21e34ba732b413c12259bac386f83283 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <eballetbo@gmail.com>
Date: Sat, 23 Oct 2010 16:49:00 +0000
Subject: omap3: IGEP v2: Improve igep2_flash_init() function

The changes are:
  - Use 'for' loop instead 'while' loop.
  - No need to initialize ret to 0, we're assigning it right after.
  - No need to check for onenandcs < GPMC_CS_NUM here, it will always be true.

Signed-off-by: Enric Balletbo i Serra <eballetbo@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-igep0020.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index 42dcbf4f1602..fe76b59b13ed 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -152,35 +152,34 @@ static struct platform_device igep2_onenand_device = {
 
 static void __init igep2_flash_init(void)
 {
-	u8		cs = 0;
-	u8		onenandcs = GPMC_CS_NUM + 1;
+	u8 cs = 0;
+	u8 onenandcs = GPMC_CS_NUM + 1;
 
-	while (cs < GPMC_CS_NUM) {
-		u32 ret = 0;
+	for (cs = 0; cs < GPMC_CS_NUM; cs++) {
+		u32 ret;
 		ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG1);
 
 		/* Check if NAND/oneNAND is configured */
 		if ((ret & 0xC00) == 0x800)
 			/* NAND found */
-			pr_err("IGEP v2: Unsupported NAND found\n");
+			pr_err("IGEP2: Unsupported NAND found\n");
 		else {
 			ret = gpmc_cs_read_reg(cs, GPMC_CS_CONFIG7);
 			if ((ret & 0x3F) == (ONENAND_MAP >> 24))
 				/* ONENAND found */
 				onenandcs = cs;
 		}
-		cs++;
 	}
+
 	if (onenandcs > GPMC_CS_NUM) {
-		pr_err("IGEP v2: Unable to find configuration in GPMC\n");
+		pr_err("IGEP2: Unable to find configuration in GPMC\n");
 		return;
 	}
 
-	if (onenandcs < GPMC_CS_NUM) {
-		igep2_onenand_data.cs = onenandcs;
-		if (platform_device_register(&igep2_onenand_device) < 0)
-			pr_err("IGEP v2: Unable to register OneNAND device\n");
-	}
+	igep2_onenand_data.cs = onenandcs;
+
+	if (platform_device_register(&igep2_onenand_device) < 0)
+		pr_err("IGEP2: Unable to register OneNAND device\n");
 }
 
 #else
-- 
cgit v1.2.3


From 20826853853aa1e1ba219ea65f1e6ba3bd662495 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 8 Nov 2010 06:56:14 +0000
Subject: arm: omap: combine zoom2 and zoom3 board-files

They are extremely similar anyway, let's get rid
of one file.

While at that, also remove the empty zoom_config
variable.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/Makefile      |   4 +-
 arch/arm/mach-omap2/board-zoom.c  | 151 ++++++++++++++++++++++++++++++++++++++
 arch/arm/mach-omap2/board-zoom2.c | 117 -----------------------------
 arch/arm/mach-omap2/board-zoom3.c | 133 ---------------------------------
 4 files changed, 153 insertions(+), 252 deletions(-)
 create mode 100644 arch/arm/mach-omap2/board-zoom.c
 delete mode 100644 arch/arm/mach-omap2/board-zoom2.c
 delete mode 100644 arch/arm/mach-omap2/board-zoom3.c

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 60e51bcf53bd..b0810b9abbba 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -144,12 +144,12 @@ obj-$(CONFIG_MACH_NOKIA_RX51)		+= board-rx51.o \
 					   board-rx51-peripherals.o \
 					   board-rx51-video.o \
 					   hsmmc.o
-obj-$(CONFIG_MACH_OMAP_ZOOM2)		+= board-zoom2.o \
+obj-$(CONFIG_MACH_OMAP_ZOOM2)		+= board-zoom.o \
 					   board-zoom-peripherals.o \
 					   board-flash.o \
 					   hsmmc.o \
 					   board-zoom-debugboard.o
-obj-$(CONFIG_MACH_OMAP_ZOOM3)		+= board-zoom3.o \
+obj-$(CONFIG_MACH_OMAP_ZOOM3)		+= board-zoom.o \
 					   board-zoom-peripherals.o \
 					   board-flash.o \
 					   hsmmc.o \
diff --git a/arch/arm/mach-omap2/board-zoom.c b/arch/arm/mach-omap2/board-zoom.c
new file mode 100644
index 000000000000..f10766ba7b5e
--- /dev/null
+++ b/arch/arm/mach-omap2/board-zoom.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2009-2010 Texas Instruments Inc.
+ * Mikkel Christensen <mlc@ti.com>
+ * Felipe Balbi <balbi@ti.com>
+ *
+ * Modified from mach-omap2/board-ldp.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/gpio.h>
+#include <linux/i2c/twl.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+
+#include <plat/common.h>
+#include <plat/board.h>
+#include <plat/usb.h>
+
+#include <mach/board-zoom.h>
+
+#include "board-flash.h"
+#include "mux.h"
+#include "sdram-micron-mt46h32m32lf-6.h"
+#include "sdram-hynix-h8mbx00u0mer-0em.h"
+
+static void __init omap_zoom_init_irq(void)
+{
+	if (machine_is_omap_zoom2())
+		omap2_init_common_hw(mt46h32m32lf6_sdrc_params,
+				mt46h32m32lf6_sdrc_params);
+	else if (machine_is_omap_zoom3())
+		omap2_init_common_hw(h8mbx00u0mer0em_sdrc_params,
+				h8mbx00u0mer0em_sdrc_params);
+
+	omap_init_irq();
+	omap_gpio_init();
+}
+
+#ifdef CONFIG_OMAP_MUX
+static struct omap_board_mux board_mux[] __initdata = {
+	/* WLAN IRQ - GPIO 162 */
+	OMAP3_MUX(MCBSP1_CLKX, OMAP_MUX_MODE4 | OMAP_PIN_INPUT_PULLUP),
+	/* WLAN POWER ENABLE - GPIO 101 */
+	OMAP3_MUX(CAM_D2, OMAP_MUX_MODE4 | OMAP_PIN_OUTPUT),
+	/* WLAN SDIO: MMC3 CMD */
+	OMAP3_MUX(MCSPI1_CS1, OMAP_MUX_MODE3 | OMAP_PIN_INPUT_PULLUP),
+	/* WLAN SDIO: MMC3 CLK */
+	OMAP3_MUX(ETK_CLK, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
+	/* WLAN SDIO: MMC3 DAT[0-3] */
+	OMAP3_MUX(ETK_D3, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
+	OMAP3_MUX(ETK_D4, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
+	OMAP3_MUX(ETK_D5, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
+	OMAP3_MUX(ETK_D6, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
+	{ .reg_offset = OMAP_MUX_TERMINATOR },
+};
+#else
+#define board_mux	NULL
+#endif
+
+static struct mtd_partition zoom_nand_partitions[] = {
+	/* All the partition sizes are listed in terms of NAND block size */
+	{
+		.name		= "X-Loader-NAND",
+		.offset		= 0,
+		.size		= 4 * (64 * 2048),	/* 512KB, 0x80000 */
+		.mask_flags	= MTD_WRITEABLE,	/* force read-only */
+	},
+	{
+		.name		= "U-Boot-NAND",
+		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x80000 */
+		.size		= 10 * (64 * 2048),	/* 1.25MB, 0x140000 */
+		.mask_flags	= MTD_WRITEABLE,	/* force read-only */
+	},
+	{
+		.name		= "Boot Env-NAND",
+		.offset		= MTDPART_OFS_APPEND,   /* Offset = 0x1c0000 */
+		.size		= 2 * (64 * 2048),	/* 256KB, 0x40000 */
+	},
+	{
+		.name		= "Kernel-NAND",
+		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x0200000*/
+		.size		= 240 * (64 * 2048),	/* 30M, 0x1E00000 */
+	},
+	{
+		.name		= "system",
+		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x2000000 */
+		.size		= 3328 * (64 * 2048),	/* 416M, 0x1A000000 */
+	},
+	{
+		.name		= "userdata",
+		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x1C000000*/
+		.size		= 256 * (64 * 2048),	/* 32M, 0x2000000 */
+	},
+	{
+		.name		= "cache",
+		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x1E000000*/
+		.size		= 256 * (64 * 2048),	/* 32M, 0x2000000 */
+	},
+};
+
+static const struct ehci_hcd_omap_platform_data ehci_pdata __initconst = {
+	.port_mode[0]		= EHCI_HCD_OMAP_MODE_UNKNOWN,
+	.port_mode[1]		= EHCI_HCD_OMAP_MODE_PHY,
+	.port_mode[2]		= EHCI_HCD_OMAP_MODE_UNKNOWN,
+	.phy_reset		= true,
+	.reset_gpio_port[0]	= -EINVAL,
+	.reset_gpio_port[1]	= 64,
+	.reset_gpio_port[2]	= -EINVAL,
+};
+
+static void __init omap_zoom_init(void)
+{
+	if (machine_is_omap_zoom2()) {
+		omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
+	} else if (machine_is_omap_zoom3()) {
+		omap3_mux_init(board_mux, OMAP_PACKAGE_CBP);
+		omap_mux_init_gpio(64, OMAP_PIN_OUTPUT);
+		usb_ehci_init(&ehci_pdata);
+	}
+
+	board_nand_init(zoom_nand_partitions,
+			ARRAY_SIZE(zoom_nand_partitions), ZOOM_NAND_CS);
+	zoom_debugboard_init();
+	zoom_peripherals_init();
+}
+
+MACHINE_START(OMAP_ZOOM2, "OMAP Zoom2 board")
+	.boot_params	= 0x80000100,
+	.map_io		= omap3_map_io,
+	.reserve	= omap_reserve,
+	.init_irq	= omap_zoom_init_irq,
+	.init_machine	= omap_zoom_init,
+	.timer		= &omap_timer,
+MACHINE_END
+
+MACHINE_START(OMAP_ZOOM3, "OMAP Zoom3 board")
+	.boot_params	= 0x80000100,
+	.map_io		= omap3_map_io,
+	.reserve	= omap_reserve,
+	.init_irq	= omap_zoom_init_irq,
+	.init_machine	= omap_zoom_init,
+	.timer		= &omap_timer,
+MACHINE_END
diff --git a/arch/arm/mach-omap2/board-zoom2.c b/arch/arm/mach-omap2/board-zoom2.c
deleted file mode 100644
index 2992a9f3a585..000000000000
--- a/arch/arm/mach-omap2/board-zoom2.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2009 Texas Instruments Inc.
- * Mikkel Christensen <mlc@ti.com>
- *
- * Modified from mach-omap2/board-ldp.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/input.h>
-#include <linux/gpio.h>
-#include <linux/i2c/twl.h>
-
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-#include <plat/common.h>
-#include <plat/board.h>
-
-#include <mach/board-zoom.h>
-
-#include "board-flash.h"
-#include "mux.h"
-#include "sdram-micron-mt46h32m32lf-6.h"
-
-static void __init omap_zoom2_init_irq(void)
-{
-	omap2_init_common_hw(mt46h32m32lf6_sdrc_params,
-				 mt46h32m32lf6_sdrc_params);
-	omap_init_irq();
-	omap_gpio_init();
-}
-
-#ifdef CONFIG_OMAP_MUX
-static struct omap_board_mux board_mux[] __initdata = {
-	/* WLAN IRQ - GPIO 162 */
-	OMAP3_MUX(MCBSP1_CLKX, OMAP_MUX_MODE4 | OMAP_PIN_INPUT_PULLUP),
-	/* WLAN POWER ENABLE - GPIO 101 */
-	OMAP3_MUX(CAM_D2, OMAP_MUX_MODE4 | OMAP_PIN_OUTPUT),
-	/* WLAN SDIO: MMC3 CMD */
-	OMAP3_MUX(MCSPI1_CS1, OMAP_MUX_MODE3 | OMAP_PIN_INPUT_PULLUP),
-	/* WLAN SDIO: MMC3 CLK */
-	OMAP3_MUX(ETK_CLK, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	/* WLAN SDIO: MMC3 DAT[0-3] */
-	OMAP3_MUX(ETK_D3, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	OMAP3_MUX(ETK_D4, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	OMAP3_MUX(ETK_D5, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	OMAP3_MUX(ETK_D6, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	{ .reg_offset = OMAP_MUX_TERMINATOR },
-};
-#else
-#define board_mux	NULL
-#endif
-
-static struct mtd_partition zoom_nand_partitions[] = {
-	/* All the partition sizes are listed in terms of NAND block size */
-	{
-		.name		= "X-Loader-NAND",
-		.offset		= 0,
-		.size		= 4 * (64 * 2048),	/* 512KB, 0x80000 */
-		.mask_flags	= MTD_WRITEABLE,	/* force read-only */
-	},
-	{
-		.name		= "U-Boot-NAND",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x80000 */
-		.size		= 10 * (64 * 2048),	/* 1.25MB, 0x140000 */
-		.mask_flags	= MTD_WRITEABLE,	/* force read-only */
-	},
-	{
-		.name		= "Boot Env-NAND",
-		.offset		= MTDPART_OFS_APPEND,   /* Offset = 0x1c0000 */
-		.size		= 2 * (64 * 2048),	/* 256KB, 0x40000 */
-	},
-	{
-		.name		= "Kernel-NAND",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x0200000*/
-		.size		= 240 * (64 * 2048),	/* 30M, 0x1E00000 */
-	},
-	{
-		.name		= "system",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x2000000 */
-		.size		= 3328 * (64 * 2048),	/* 416M, 0x1A000000 */
-	},
-	{
-		.name		= "userdata",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x1C000000*/
-		.size		= 256 * (64 * 2048),	/* 32M, 0x2000000 */
-	},
-	{
-		.name		= "cache",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x1E000000*/
-		.size		= 256 * (64 * 2048),	/* 32M, 0x2000000 */
-	},
-};
-
-static void __init omap_zoom2_init(void)
-{
-	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
-	zoom_peripherals_init();
-	board_nand_init(zoom_nand_partitions,
-			ARRAY_SIZE(zoom_nand_partitions), ZOOM_NAND_CS);
-	zoom_debugboard_init();
-}
-
-MACHINE_START(OMAP_ZOOM2, "OMAP Zoom2 board")
-	.boot_params	= 0x80000100,
-	.map_io		= omap3_map_io,
-	.reserve	= omap_reserve,
-	.init_irq	= omap_zoom2_init_irq,
-	.init_machine	= omap_zoom2_init,
-	.timer		= &omap_timer,
-MACHINE_END
diff --git a/arch/arm/mach-omap2/board-zoom3.c b/arch/arm/mach-omap2/board-zoom3.c
deleted file mode 100644
index 5adde12c0395..000000000000
--- a/arch/arm/mach-omap2/board-zoom3.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (C) 2009 Texas Instruments Inc.
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/input.h>
-#include <linux/gpio.h>
-
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-#include <mach/board-zoom.h>
-
-#include <plat/common.h>
-#include <plat/board.h>
-#include <plat/usb.h>
-
-#include "board-flash.h"
-#include "mux.h"
-#include "sdram-hynix-h8mbx00u0mer-0em.h"
-
-static struct omap_board_config_kernel zoom_config[] __initdata = {
-};
-
-static struct mtd_partition zoom_nand_partitions[] = {
-	/* All the partition sizes are listed in terms of NAND block size */
-	{
-		.name		= "X-Loader-NAND",
-		.offset		= 0,
-		.size		= 4 * (64 * 2048),	/* 512KB, 0x80000 */
-		.mask_flags	= MTD_WRITEABLE,	/* force read-only */
-	},
-	{
-		.name		= "U-Boot-NAND",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x80000 */
-		.size		= 10 * (64 * 2048),	/* 1.25MB, 0x140000 */
-		.mask_flags	= MTD_WRITEABLE,	/* force read-only */
-	},
-	{
-		.name		= "Boot Env-NAND",
-		.offset		= MTDPART_OFS_APPEND,   /* Offset = 0x1c0000 */
-		.size		= 2 * (64 * 2048),	/* 256KB, 0x40000 */
-	},
-	{
-		.name		= "Kernel-NAND",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x0200000*/
-		.size		= 240 * (64 * 2048),	/* 30M, 0x1E00000 */
-	},
-	{
-		.name		= "system",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x2000000 */
-		.size		= 3328 * (64 * 2048),	/* 416M, 0x1A000000 */
-	},
-	{
-		.name		= "userdata",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x1C000000*/
-		.size		= 256 * (64 * 2048),	/* 32M, 0x2000000 */
-	},
-	{
-		.name		= "cache",
-		.offset		= MTDPART_OFS_APPEND,	/* Offset = 0x1E000000*/
-		.size		= 256 * (64 * 2048),	/* 32M, 0x2000000 */
-	},
-};
-
-static void __init omap_zoom_init_irq(void)
-{
-	omap_board_config = zoom_config;
-	omap_board_config_size = ARRAY_SIZE(zoom_config);
-	omap2_init_common_hw(h8mbx00u0mer0em_sdrc_params,
-			h8mbx00u0mer0em_sdrc_params);
-	omap_init_irq();
-	omap_gpio_init();
-}
-
-#ifdef CONFIG_OMAP_MUX
-static struct omap_board_mux board_mux[] __initdata = {
-	/* WLAN IRQ - GPIO 162 */
-	OMAP3_MUX(MCBSP1_CLKX, OMAP_MUX_MODE4 | OMAP_PIN_INPUT_PULLUP),
-	/* WLAN POWER ENABLE - GPIO 101 */
-	OMAP3_MUX(CAM_D2, OMAP_MUX_MODE4 | OMAP_PIN_OUTPUT),
-	/* WLAN SDIO: MMC3 CMD */
-	OMAP3_MUX(MCSPI1_CS1, OMAP_MUX_MODE3 | OMAP_PIN_INPUT_PULLUP),
-	/* WLAN SDIO: MMC3 CLK */
-	OMAP3_MUX(ETK_CLK, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	/* WLAN SDIO: MMC3 DAT[0-3] */
-	OMAP3_MUX(ETK_D3, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	OMAP3_MUX(ETK_D4, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	OMAP3_MUX(ETK_D5, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	OMAP3_MUX(ETK_D6, OMAP_MUX_MODE2 | OMAP_PIN_INPUT_PULLUP),
-	{ .reg_offset = OMAP_MUX_TERMINATOR },
-};
-#else
-#define board_mux	NULL
-#endif
-
-static const struct ehci_hcd_omap_platform_data ehci_pdata __initconst = {
-	.port_mode[0]		= EHCI_HCD_OMAP_MODE_UNKNOWN,
-	.port_mode[1]		= EHCI_HCD_OMAP_MODE_PHY,
-	.port_mode[2]		= EHCI_HCD_OMAP_MODE_UNKNOWN,
-	.phy_reset		= true,
-	.reset_gpio_port[0]	= -EINVAL,
-	.reset_gpio_port[1]	= 64,
-	.reset_gpio_port[2]	= -EINVAL,
-};
-
-static void __init omap_zoom_init(void)
-{
-	omap3_mux_init(board_mux, OMAP_PACKAGE_CBP);
-	zoom_peripherals_init();
-	board_nand_init(zoom_nand_partitions,
-			 ARRAY_SIZE(zoom_nand_partitions), ZOOM_NAND_CS);
-	zoom_debugboard_init();
-
-	omap_mux_init_gpio(64, OMAP_PIN_OUTPUT);
-	usb_ehci_init(&ehci_pdata);
-}
-
-MACHINE_START(OMAP_ZOOM3, "OMAP Zoom3 board")
-	.boot_params	= 0x80000100,
-	.map_io		= omap3_map_io,
-	.reserve	= omap_reserve,
-	.init_irq	= omap_zoom_init_irq,
-	.init_machine	= omap_zoom_init,
-	.timer		= &omap_timer,
-MACHINE_END
-- 
cgit v1.2.3


From c6c4dea4011278f03ab8a3c8329fba9098d2a892 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 8 Nov 2010 06:56:15 +0000
Subject: arm: omap: zoom: substitute gpio number with symbolic name

It's easier to understand what that number means and
also avoids problems if we ever have to change it.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-zoom.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-omap2/board-zoom.c b/arch/arm/mach-omap2/board-zoom.c
index f10766ba7b5e..3da69e4102a4 100644
--- a/arch/arm/mach-omap2/board-zoom.c
+++ b/arch/arm/mach-omap2/board-zoom.c
@@ -31,6 +31,8 @@
 #include "sdram-micron-mt46h32m32lf-6.h"
 #include "sdram-hynix-h8mbx00u0mer-0em.h"
 
+#define ZOOM3_EHCI_RESET_GPIO		64
+
 static void __init omap_zoom_init_irq(void)
 {
 	if (machine_is_omap_zoom2())
@@ -112,7 +114,7 @@ static const struct ehci_hcd_omap_platform_data ehci_pdata __initconst = {
 	.port_mode[2]		= EHCI_HCD_OMAP_MODE_UNKNOWN,
 	.phy_reset		= true,
 	.reset_gpio_port[0]	= -EINVAL,
-	.reset_gpio_port[1]	= 64,
+	.reset_gpio_port[1]	= ZOOM3_EHCI_RESET_GPIO,
 	.reset_gpio_port[2]	= -EINVAL,
 };
 
@@ -122,7 +124,7 @@ static void __init omap_zoom_init(void)
 		omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
 	} else if (machine_is_omap_zoom3()) {
 		omap3_mux_init(board_mux, OMAP_PACKAGE_CBP);
-		omap_mux_init_gpio(64, OMAP_PIN_OUTPUT);
+		omap_mux_init_gpio(ZOOM3_EHCI_RESET_GPIO, OMAP_PIN_OUTPUT);
 		usb_ehci_init(&ehci_pdata);
 	}
 
-- 
cgit v1.2.3


From 9632f9890e76aa7a78f719b0b3b572f65e3e968b Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Date: Tue, 16 Nov 2010 14:26:57 -0800
Subject: OMAP1: drop AMS_DELTA_FIQ config option

This patches removes a config option that was used to select a FIQ
handler to be build for Amstrad Delta, as required by the on-board serio
interface driver. Not having any problem reports received since it was
introduced in 2.6.35, the FIQ handler can now be built and initialized by
default, thus reqiring no extra config option.

Signed-off-by: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/Kconfig           | 11 +----------
 arch/arm/mach-omap1/Makefile          |  4 ++--
 arch/arm/mach-omap1/board-ams-delta.c |  2 --
 drivers/input/serio/Kconfig           |  1 -
 4 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig
index 5f6496375404..8d2f2daba0c0 100644
--- a/arch/arm/mach-omap1/Kconfig
+++ b/arch/arm/mach-omap1/Kconfig
@@ -152,20 +152,11 @@ config MACH_NOKIA770
 config MACH_AMS_DELTA
 	bool "Amstrad E3 (Delta)"
 	depends on ARCH_OMAP1 && ARCH_OMAP15XX
+	select FIQ
 	help
 	  Support for the Amstrad E3 (codename Delta) videophone. Say Y here
 	  if you have such a device.
 
-config AMS_DELTA_FIQ
-	bool "Fast Interrupt Request (FIQ) support for the E3"
-	depends on MACH_AMS_DELTA
-	select FIQ
-	help
-	  Provide a FIQ handler for the E3.
-	  This allows for fast handling of interrupts generated
-	  by the clock line of the E3 mailboard (or a PS/2 keyboard)
-	  connected to the GPIO based external keyboard port.
-
 config MACH_OMAP_GENERIC
 	bool "Generic OMAP board"
 	depends on ARCH_OMAP1 && (ARCH_OMAP15XX || ARCH_OMAP16XX)
diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile
index 9a304d854e33..de3cc130ab8e 100644
--- a/arch/arm/mach-omap1/Makefile
+++ b/arch/arm/mach-omap1/Makefile
@@ -39,8 +39,8 @@ obj-$(CONFIG_MACH_OMAP_PALMTE)		+= board-palmte.o
 obj-$(CONFIG_MACH_OMAP_PALMZ71)		+= board-palmz71.o
 obj-$(CONFIG_MACH_OMAP_PALMTT)		+= board-palmtt.o
 obj-$(CONFIG_MACH_NOKIA770)		+= board-nokia770.o
-obj-$(CONFIG_MACH_AMS_DELTA)		+= board-ams-delta.o
-obj-$(CONFIG_AMS_DELTA_FIQ)		+= ams-delta-fiq.o ams-delta-fiq-handler.o
+obj-$(CONFIG_MACH_AMS_DELTA)		+= board-ams-delta.o ams-delta-fiq.o \
+					   ams-delta-fiq-handler.o
 obj-$(CONFIG_MACH_SX1)			+= board-sx1.o board-sx1-mmc.o
 obj-$(CONFIG_MACH_HERALD)		+= board-htcherald.o
 
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 1d4163b9f0b7..eaa7081fef8f 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -307,9 +307,7 @@ static void __init ams_delta_init(void)
 #endif
 	platform_add_devices(ams_delta_devices, ARRAY_SIZE(ams_delta_devices));
 
-#ifdef CONFIG_AMS_DELTA_FIQ
 	ams_delta_init_fiq();
-#endif
 
 	omap_writew(omap_readw(ARM_RSTCT1) | 0x0004, ARM_RSTCT1);
 }
diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index 6256233d2bfb..bcb1fdedb595 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig
@@ -214,7 +214,6 @@ config SERIO_AMS_DELTA
 	tristate "Amstrad Delta (E3) mailboard support"
 	depends on MACH_AMS_DELTA
 	default y
-	select AMS_DELTA_FIQ
 	---help---
 	  Say Y here if you have an E3 and want to use its mailboard,
 	  or any standard AT keyboard connected to the mailboard port.
-- 
cgit v1.2.3


From 1cb125caf2e0fdece14f72efbb3516f5c5fe1de9 Mon Sep 17 00:00:00 2001
From: Srinath <srinath@mistralsolutions.com>
Date: Tue, 16 Nov 2010 14:26:58 -0800
Subject: omap: AM3517/05: Add craneboard support

Craneboard is a hardware development platform based on the
Sitara AM3517 ARM Cortex - A8 microprocessor device. This is a
low cost reference design.

This patch adds basic board file. Detailed support will follow in
subsequent patches.

  [1] http://www.ti.com/arm
  [2] http://www.mistralsolutions.com/products/craneboard.php

Signed-off-by: Srinath <srinath@mistralsolutions.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/Kconfig                  |  5 ++
 arch/arm/mach-omap2/Makefile                 |  2 +
 arch/arm/mach-omap2/board-am3517crane.c      | 69 ++++++++++++++++++++++++++++
 arch/arm/plat-omap/include/plat/uncompress.h |  1 +
 4 files changed, 77 insertions(+)
 create mode 100644 arch/arm/mach-omap2/board-am3517crane.c

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index ab784bfde908..368851522629 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -174,6 +174,11 @@ config MACH_OMAP3517EVM
 	default y
 	select OMAP_PACKAGE_CBB
 
+config MACH_CRANEBOARD
+	bool "AM3517/05 CRANE board"
+	depends on ARCH_OMAP3
+	select OMAP_PACKAGE_CBB
+
 config MACH_OMAP3_PANDORA
 	bool "OMAP3 Pandora"
 	depends on ARCH_OMAP3
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index b0810b9abbba..d43bd33a19be 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -174,6 +174,8 @@ obj-$(CONFIG_MACH_OMAP4_PANDA)		+= board-omap4panda.o \
 
 obj-$(CONFIG_MACH_OMAP3517EVM)		+= board-am3517evm.o
 
+obj-$(CONFIG_MACH_CRANEBOARD)		+= board-am3517crane.o
+
 obj-$(CONFIG_MACH_SBC3530)		+= board-omap3stalker.o \
 					   hsmmc.o
 # Platform specific device init code
diff --git a/arch/arm/mach-omap2/board-am3517crane.c b/arch/arm/mach-omap2/board-am3517crane.c
new file mode 100644
index 000000000000..13ead330e389
--- /dev/null
+++ b/arch/arm/mach-omap2/board-am3517crane.c
@@ -0,0 +1,69 @@
+/*
+ * Support for AM3517/05 Craneboard
+ * http://www.mistralsolutions.com/products/craneboard.php
+ *
+ * Copyright (C) 2010 Mistral Solutions Pvt Ltd. <www.mistralsolutions.com>
+ * Author: R.Srinath <srinath@mistralsolutions.com>
+ *
+ * Based on mach-omap2/board-am3517evm.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as  published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/gpio.h>
+
+#include <mach/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <plat/board.h>
+#include <plat/common.h>
+
+#include "mux.h"
+
+/* Board initialization */
+static struct omap_board_config_kernel am3517_crane_config[] __initdata = {
+};
+
+#ifdef CONFIG_OMAP_MUX
+static struct omap_board_mux board_mux[] __initdata = {
+	{ .reg_offset = OMAP_MUX_TERMINATOR },
+};
+#else
+#define board_mux	NULL
+#endif
+
+static void __init am3517_crane_init_irq(void)
+{
+	omap_board_config = am3517_crane_config;
+	omap_board_config_size = ARRAY_SIZE(am3517_crane_config);
+
+	omap2_init_common_hw(NULL, NULL);
+	omap_init_irq();
+	omap_gpio_init();
+}
+
+static void __init am3517_crane_init(void)
+{
+	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
+	omap_serial_init();
+}
+
+MACHINE_START(CRANEBOARD, "AM3517/05 CRANEBOARD")
+	.boot_params	= 0x80000100,
+	.map_io		= omap3_map_io,
+	.reserve	= omap_reserve,
+	.init_irq	= am3517_crane_init_irq,
+	.init_machine	= am3517_crane_init,
+	.timer		= &omap_timer,
+MACHINE_END
diff --git a/arch/arm/plat-omap/include/plat/uncompress.h b/arch/arm/plat-omap/include/plat/uncompress.h
index 9036e374e0ac..229fbf2cbd26 100644
--- a/arch/arm/plat-omap/include/plat/uncompress.h
+++ b/arch/arm/plat-omap/include/plat/uncompress.h
@@ -145,6 +145,7 @@ static inline void __arch_decomp_setup(unsigned long arch_id)
 		/* omap3 based boards using UART3 */
 		DEBUG_LL_OMAP3(3, cm_t35);
 		DEBUG_LL_OMAP3(3, cm_t3517);
+		DEBUG_LL_OMAP3(3, craneboard);
 		DEBUG_LL_OMAP3(3, igep0020);
 		DEBUG_LL_OMAP3(3, igep0030);
 		DEBUG_LL_OMAP3(3, nokia_rx51);
-- 
cgit v1.2.3


From 498cb95175c29ed96bf32f30df2d11ec1c7f3879 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Tue, 30 Nov 2010 14:11:49 -0800
Subject: OMAP: Serial: Define OMAP uart MDR1 reg and remove magic numbers

Define MDR1 register serial definitions used in serial and
bluetooth drivers.
Change magic number to ones defined in serial_reg for omap1/2
serial driver.
Remove redefined MDR1 register definitions in omap-serial driver.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: G, Manjunath Kondaiah <manjugk@ti.com>
Acked-by: Govindraj.R <govindraj.raja@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/serial.c                  |  6 ++++--
 arch/arm/mach-omap2/serial.c                  | 15 +++++++++------
 arch/arm/plat-omap/include/plat/omap-serial.h |  3 ---
 drivers/serial/omap-serial.c                  |  6 +++---
 include/linux/serial_reg.h                    | 12 ++++++++++++
 5 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c
index b78d0749f13d..c73d1b77b364 100644
--- a/arch/arm/mach-omap1/serial.c
+++ b/arch/arm/mach-omap1/serial.c
@@ -52,9 +52,11 @@ static inline void omap_serial_outp(struct plat_serial8250_port *p, int offset,
  */
 static void __init omap_serial_reset(struct plat_serial8250_port *p)
 {
-	omap_serial_outp(p, UART_OMAP_MDR1, 0x07);	/* disable UART */
+	omap_serial_outp(p, UART_OMAP_MDR1,
+			UART_OMAP_MDR1_DISABLE);	/* disable UART */
 	omap_serial_outp(p, UART_OMAP_SCR, 0x08);	/* TX watermark */
-	omap_serial_outp(p, UART_OMAP_MDR1, 0x00);	/* enable UART */
+	omap_serial_outp(p, UART_OMAP_MDR1,
+			UART_OMAP_MDR1_16X_MODE);	/* enable UART */
 
 	if (!cpu_is_omap15xx()) {
 		omap_serial_outp(p, UART_OMAP_SYSC, 0x01);
diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index d17960a1be25..fa9806250b50 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -169,9 +169,9 @@ static inline void serial_write_reg(struct omap_uart_state *uart, int offset,
 
 static inline void __init omap_uart_reset(struct omap_uart_state *uart)
 {
-	serial_write_reg(uart, UART_OMAP_MDR1, 0x07);
+	serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
 	serial_write_reg(uart, UART_OMAP_SCR, 0x08);
-	serial_write_reg(uart, UART_OMAP_MDR1, 0x00);
+	serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_16X_MODE);
 }
 
 #if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP3)
@@ -247,9 +247,10 @@ static void omap_uart_restore_context(struct omap_uart_state *uart)
 	uart->context_valid = 0;
 
 	if (uart->errata & UART_ERRATA_i202_MDR1_ACCESS)
-		omap_uart_mdr1_errataset(uart, 0x07, 0xA0);
+		omap_uart_mdr1_errataset(uart, UART_OMAP_MDR1_DISABLE, 0xA0);
 	else
-		serial_write_reg(uart, UART_OMAP_MDR1, 0x7);
+		serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
+
 	serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */
 	efr = serial_read_reg(uart, UART_EFR);
 	serial_write_reg(uart, UART_EFR, UART_EFR_ECB);
@@ -268,11 +269,13 @@ static void omap_uart_restore_context(struct omap_uart_state *uart)
 	serial_write_reg(uart, UART_OMAP_SCR, uart->scr);
 	serial_write_reg(uart, UART_OMAP_WER, uart->wer);
 	serial_write_reg(uart, UART_OMAP_SYSC, uart->sysc);
+
 	if (uart->errata & UART_ERRATA_i202_MDR1_ACCESS)
-		omap_uart_mdr1_errataset(uart, 0x00, 0xA1);
+		omap_uart_mdr1_errataset(uart, UART_OMAP_MDR1_16X_MODE, 0xA1);
 	else
 		/* UART 16x mode */
-		serial_write_reg(uart, UART_OMAP_MDR1, 0x00);
+		serial_write_reg(uart, UART_OMAP_MDR1,
+				UART_OMAP_MDR1_16X_MODE);
 }
 #else
 static inline void omap_uart_save_context(struct omap_uart_state *uart) {}
diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
index c8dae02f0704..6a1788014611 100644
--- a/arch/arm/plat-omap/include/plat/omap-serial.h
+++ b/arch/arm/plat-omap/include/plat/omap-serial.h
@@ -31,9 +31,6 @@
  */
 #define OMAP_SERIAL_NAME	"ttyO"
 
-#define OMAP_MDR1_DISABLE	0x07
-#define OMAP_MDR1_MODE13X	0x03
-#define OMAP_MDR1_MODE16X	0x00
 #define OMAP_MODE13X_SPEED	230400
 
 /*
diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c
index 14365f72b664..03a96db67de4 100644
--- a/drivers/serial/omap-serial.c
+++ b/drivers/serial/omap-serial.c
@@ -753,7 +753,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	/* Protocol, Baud Rate, and Interrupt Settings */
 
-	serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_DISABLE);
+	serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
 	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
 
 	up->efr = serial_in(up, UART_EFR);
@@ -774,9 +774,9 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 	serial_out(up, UART_LCR, cval);
 
 	if (baud > 230400 && baud != 3000000)
-		serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE13X);
+		serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_13X_MODE);
 	else
-		serial_out(up, UART_OMAP_MDR1, OMAP_MDR1_MODE16X);
+		serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_16X_MODE);
 
 	/* Hardware Flow Control Configuration */
 
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index c7a0ce11cd47..6f3823474e6c 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -341,5 +341,17 @@
 #define UART_OMAP_SYSS		0x16	/* System status register */
 #define UART_OMAP_WER		0x17	/* Wake-up enable register */
 
+/*
+ * These are the definitions for the MDR1 register
+ */
+#define UART_OMAP_MDR1_16X_MODE		0x00	/* UART 16x mode */
+#define UART_OMAP_MDR1_SIR_MODE		0x01	/* SIR mode */
+#define UART_OMAP_MDR1_16X_ABAUD_MODE	0x02	/* UART 16x auto-baud */
+#define UART_OMAP_MDR1_13X_MODE		0x03	/* UART 13x mode */
+#define UART_OMAP_MDR1_MIR_MODE		0x04	/* MIR mode */
+#define UART_OMAP_MDR1_FIR_MODE		0x05	/* FIR mode */
+#define UART_OMAP_MDR1_CIR_MODE		0x06	/* CIR mode */
+#define UART_OMAP_MDR1_DISABLE		0x07	/* Disable (default state) */
+
 #endif /* _LINUX_SERIAL_REG_H */
 
-- 
cgit v1.2.3


From 662b083a87a3489f3f19c6e0651c1b99b0de5df0 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Tue, 30 Nov 2010 14:11:49 -0800
Subject: omap: Serial: Define register access modes in LCR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Access to some registers depends on register access mode
Three different modes are available for OMAP (at least)
• Operational mode     LCR_REG[7] = 0x0
• Configuration mode A LCR_REG[7] = 0x1 and LCR_REG[7:0]! = 0xBF
• Configuration mode B LCR_REG[7] = 0x1 and LCR_REG[7:0]  = 0xBF

Define access modes and remove redefinitions and magic numbers
in serial drivers (and later in bluetooth driver).

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Govindraj.R <govindraj.raja@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/serial.c                  | 12 +++++-----
 arch/arm/plat-omap/include/plat/omap-serial.h |  9 -------
 drivers/serial/8250.c                         | 26 ++++++++++----------
 drivers/serial/omap-serial.c                  | 34 +++++++++++++--------------
 include/linux/serial_reg.h                    |  7 ++++++
 5 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index fa9806250b50..9dc077e2d8af 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -219,7 +219,7 @@ static void omap_uart_save_context(struct omap_uart_state *uart)
 		return;
 
 	lcr = serial_read_reg(uart, UART_LCR);
-	serial_write_reg(uart, UART_LCR, 0xBF);
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B);
 	uart->dll = serial_read_reg(uart, UART_DLL);
 	uart->dlh = serial_read_reg(uart, UART_DLM);
 	serial_write_reg(uart, UART_LCR, lcr);
@@ -227,7 +227,7 @@ static void omap_uart_save_context(struct omap_uart_state *uart)
 	uart->sysc = serial_read_reg(uart, UART_OMAP_SYSC);
 	uart->scr = serial_read_reg(uart, UART_OMAP_SCR);
 	uart->wer = serial_read_reg(uart, UART_OMAP_WER);
-	serial_write_reg(uart, UART_LCR, 0x80);
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_A);
 	uart->mcr = serial_read_reg(uart, UART_MCR);
 	serial_write_reg(uart, UART_LCR, lcr);
 
@@ -251,19 +251,19 @@ static void omap_uart_restore_context(struct omap_uart_state *uart)
 	else
 		serial_write_reg(uart, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
 
-	serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B);
 	efr = serial_read_reg(uart, UART_EFR);
 	serial_write_reg(uart, UART_EFR, UART_EFR_ECB);
 	serial_write_reg(uart, UART_LCR, 0x0); /* Operational mode */
 	serial_write_reg(uart, UART_IER, 0x0);
-	serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_write_reg(uart, UART_DLL, uart->dll);
 	serial_write_reg(uart, UART_DLM, uart->dlh);
 	serial_write_reg(uart, UART_LCR, 0x0); /* Operational mode */
 	serial_write_reg(uart, UART_IER, uart->ier);
-	serial_write_reg(uart, UART_LCR, 0x80);
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_write_reg(uart, UART_MCR, uart->mcr);
-	serial_write_reg(uart, UART_LCR, 0xBF); /* Config B mode */
+	serial_write_reg(uart, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_write_reg(uart, UART_EFR, efr);
 	serial_write_reg(uart, UART_LCR, UART_LCR_WLEN8);
 	serial_write_reg(uart, UART_OMAP_SCR, uart->scr);
diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
index 6a1788014611..b3e0bad9b77e 100644
--- a/arch/arm/plat-omap/include/plat/omap-serial.h
+++ b/arch/arm/plat-omap/include/plat/omap-serial.h
@@ -33,15 +33,6 @@
 
 #define OMAP_MODE13X_SPEED	230400
 
-/*
- * LCR = 0XBF: Switch to Configuration Mode B.
- * In configuration mode b allow access
- * to EFR,DLL,DLH.
- * Reference OMAP TRM Chapter 17
- * Section: 1.4.3 Mode Selection
- */
-#define OMAP_UART_LCR_CONF_MDB	0XBF
-
 /* WER = 0x7F
  * Enable module level wakeup in WER reg
  */
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 4d8e14b7aa93..aaf9907e6014 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -653,13 +653,13 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 {
 	if (p->capabilities & UART_CAP_SLEEP) {
 		if (p->capabilities & UART_CAP_EFR) {
-			serial_outp(p, UART_LCR, 0xBF);
+			serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_B);
 			serial_outp(p, UART_EFR, UART_EFR_ECB);
 			serial_outp(p, UART_LCR, 0);
 		}
 		serial_outp(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
 		if (p->capabilities & UART_CAP_EFR) {
-			serial_outp(p, UART_LCR, 0xBF);
+			serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_B);
 			serial_outp(p, UART_EFR, 0);
 			serial_outp(p, UART_LCR, 0);
 		}
@@ -752,7 +752,7 @@ static int size_fifo(struct uart_8250_port *up)
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO |
 		    UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
 	serial_outp(up, UART_MCR, UART_MCR_LOOP);
-	serial_outp(up, UART_LCR, UART_LCR_DLAB);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	old_dl = serial_dl_read(up);
 	serial_dl_write(up, 0x0001);
 	serial_outp(up, UART_LCR, 0x03);
@@ -764,7 +764,7 @@ static int size_fifo(struct uart_8250_port *up)
 		serial_inp(up, UART_RX);
 	serial_outp(up, UART_FCR, old_fcr);
 	serial_outp(up, UART_MCR, old_mcr);
-	serial_outp(up, UART_LCR, UART_LCR_DLAB);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_dl_write(up, old_dl);
 	serial_outp(up, UART_LCR, old_lcr);
 
@@ -782,7 +782,7 @@ static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p)
 	unsigned int id;
 
 	old_lcr = serial_inp(p, UART_LCR);
-	serial_outp(p, UART_LCR, UART_LCR_DLAB);
+	serial_outp(p, UART_LCR, UART_LCR_CONF_MODE_A);
 
 	old_dll = serial_inp(p, UART_DLL);
 	old_dlm = serial_inp(p, UART_DLM);
@@ -836,7 +836,7 @@ static void autoconfig_has_efr(struct uart_8250_port *up)
 	 * recommended for new designs).
 	 */
 	up->acr = 0;
-	serial_out(up, UART_LCR, 0xBF);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_out(up, UART_EFR, UART_EFR_ECB);
 	serial_out(up, UART_LCR, 0x00);
 	id1 = serial_icr_read(up, UART_ID1);
@@ -945,7 +945,7 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 	 * Check for presence of the EFR when DLAB is set.
 	 * Only ST16C650V1 UARTs pass this test.
 	 */
-	serial_outp(up, UART_LCR, UART_LCR_DLAB);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	if (serial_in(up, UART_EFR) == 0) {
 		serial_outp(up, UART_EFR, 0xA8);
 		if (serial_in(up, UART_EFR) != 0) {
@@ -963,7 +963,7 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 	 * Maybe it requires 0xbf to be written to the LCR.
 	 * (other ST16C650V2 UARTs, TI16C752A, etc)
 	 */
-	serial_outp(up, UART_LCR, 0xBF);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) {
 		DEBUG_AUTOCONF("EFRv2 ");
 		autoconfig_has_efr(up);
@@ -1024,7 +1024,7 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
 	status1 = serial_in(up, UART_IIR) >> 5;
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-	serial_outp(up, UART_LCR, UART_LCR_DLAB);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
 	status2 = serial_in(up, UART_IIR) >> 5;
 	serial_outp(up, UART_FCR, UART_FCR_ENABLE_FIFO);
@@ -1183,7 +1183,7 @@ static void autoconfig(struct uart_8250_port *up, unsigned int probeflags)
 	 * We also initialise the EFR (if any) to zero for later.  The
 	 * EFR occupies the same register location as the FCR and IIR.
 	 */
-	serial_outp(up, UART_LCR, 0xBF);
+	serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_outp(up, UART_EFR, 0);
 	serial_outp(up, UART_LCR, 0);
 
@@ -1952,7 +1952,7 @@ static int serial8250_startup(struct uart_port *port)
 	if (up->port.type == PORT_16C950) {
 		/* Wake up and initialize UART */
 		up->acr = 0;
-		serial_outp(up, UART_LCR, 0xBF);
+		serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 		serial_outp(up, UART_EFR, UART_EFR_ECB);
 		serial_outp(up, UART_IER, 0);
 		serial_outp(up, UART_LCR, 0);
@@ -2002,7 +2002,7 @@ static int serial8250_startup(struct uart_port *port)
 	if (up->port.type == PORT_16850) {
 		unsigned char fctr;
 
-		serial_outp(up, UART_LCR, 0xbf);
+		serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 		fctr = serial_inp(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX);
 		serial_outp(up, UART_FCTR, fctr | UART_FCTR_TRGD | UART_FCTR_RX);
@@ -2363,7 +2363,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 		if (termios->c_cflag & CRTSCTS)
 			efr |= UART_EFR_CTS;
 
-		serial_outp(up, UART_LCR, 0xBF);
+		serial_outp(up, UART_LCR, UART_LCR_CONF_MODE_B);
 		serial_outp(up, UART_EFR, efr);
 	}
 
diff --git a/drivers/serial/omap-serial.c b/drivers/serial/omap-serial.c
index 03a96db67de4..1201eff1831e 100644
--- a/drivers/serial/omap-serial.c
+++ b/drivers/serial/omap-serial.c
@@ -570,7 +570,7 @@ serial_omap_configure_xonxoff
 	unsigned char efr = 0;
 
 	up->lcr = serial_in(up, UART_LCR);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	up->efr = serial_in(up, UART_EFR);
 	serial_out(up, UART_EFR, up->efr & ~UART_EFR_ECB);
 
@@ -598,7 +598,7 @@ serial_omap_configure_xonxoff
 		efr |= OMAP_UART_SW_RX;
 
 	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 
 	up->mcr = serial_in(up, UART_MCR);
 
@@ -612,14 +612,14 @@ serial_omap_configure_xonxoff
 		up->mcr |= UART_MCR_XONANY;
 
 	serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG);
 	/* Enable special char function UARTi.EFR_REG[5] and
 	 * load the new software flow control mode IXON or IXOFF
 	 * and restore the UARTi.EFR_REG[4] ENHANCED_EN value.
 	 */
 	serial_out(up, UART_EFR, efr | UART_EFR_SCD);
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 
 	serial_out(up, UART_MCR, up->mcr & ~UART_MCR_TCRTLR);
 	serial_out(up, UART_LCR, up->lcr);
@@ -724,22 +724,22 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * baud clock is not running
 	 * DLL_REG and DLH_REG set to 0.
 	 */
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_out(up, UART_DLL, 0);
 	serial_out(up, UART_DLM, 0);
 	serial_out(up, UART_LCR, 0);
 
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	up->efr = serial_in(up, UART_EFR);
 	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
 
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	up->mcr = serial_in(up, UART_MCR);
 	serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
 	/* FIFO ENABLE, DMA MODE */
 	serial_out(up, UART_FCR, up->fcr);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	if (up->use_dma) {
 		serial_out(up, UART_TI752_TLR, 0);
@@ -748,27 +748,27 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 	}
 
 	serial_out(up, UART_EFR, up->efr);
-	serial_out(up, UART_LCR, UART_LCR_DLAB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 	serial_out(up, UART_MCR, up->mcr);
 
 	/* Protocol, Baud Rate, and Interrupt Settings */
 
 	serial_out(up, UART_OMAP_MDR1, UART_OMAP_MDR1_DISABLE);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	up->efr = serial_in(up, UART_EFR);
 	serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
 
 	serial_out(up, UART_LCR, 0);
 	serial_out(up, UART_IER, 0);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	serial_out(up, UART_DLL, quot & 0xff);          /* LS of divisor */
 	serial_out(up, UART_DLM, quot >> 8);            /* MS of divisor */
 
 	serial_out(up, UART_LCR, 0);
 	serial_out(up, UART_IER, up->ier);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 
 	serial_out(up, UART_EFR, up->efr);
 	serial_out(up, UART_LCR, cval);
@@ -782,18 +782,18 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	if (termios->c_cflag & CRTSCTS) {
 		efr |= (UART_EFR_CTS | UART_EFR_RTS);
-		serial_out(up, UART_LCR, UART_LCR_DLAB);
+		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 
 		up->mcr = serial_in(up, UART_MCR);
 		serial_out(up, UART_MCR, up->mcr | UART_MCR_TCRTLR);
 
-		serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 		up->efr = serial_in(up, UART_EFR);
 		serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
 
 		serial_out(up, UART_TI752_TCR, OMAP_UART_TCR_TRIG);
 		serial_out(up, UART_EFR, efr); /* Enable AUTORTS and AUTOCTS */
-		serial_out(up, UART_LCR, UART_LCR_DLAB);
+		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
 		serial_out(up, UART_MCR, up->mcr | UART_MCR_RTS);
 		serial_out(up, UART_LCR, cval);
 	}
@@ -815,13 +815,13 @@ serial_omap_pm(struct uart_port *port, unsigned int state,
 	unsigned char efr;
 
 	dev_dbg(up->port.dev, "serial_omap_pm+%d\n", up->pdev->id);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	efr = serial_in(up, UART_EFR);
 	serial_out(up, UART_EFR, efr | UART_EFR_ECB);
 	serial_out(up, UART_LCR, 0);
 
 	serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0);
-	serial_out(up, UART_LCR, OMAP_UART_LCR_CONF_MDB);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
 	serial_out(up, UART_EFR, efr);
 	serial_out(up, UART_LCR, 0);
 	/* Enable module level wake up */
diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h
index 6f3823474e6c..3ecb71a9e505 100644
--- a/include/linux/serial_reg.h
+++ b/include/linux/serial_reg.h
@@ -99,6 +99,13 @@
 #define UART_LCR_WLEN7		0x02 /* Wordlength: 7 bits */
 #define UART_LCR_WLEN8		0x03 /* Wordlength: 8 bits */
 
+/*
+ * Access to some registers depends on register access / configuration
+ * mode.
+ */
+#define UART_LCR_CONF_MODE_A	UART_LCR_DLAB	/* Configutation mode A */
+#define UART_LCR_CONF_MODE_B	0xBF		/* Configutation mode B */
+
 #define UART_MCR	4	/* Out: Modem Control Register */
 #define UART_MCR_CLKSEL		0x80 /* Divide clock by 4 (TI16C752, EFR[4]=1) */
 #define UART_MCR_TCRTLR		0x40 /* Access TCR/TLR (TI16C752, EFR[4]=1) */
-- 
cgit v1.2.3