Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc/lib
download: linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz
linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.bz2
linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip
8 files changed, 1960 insertions, 0 deletions
diff --git a/arch/ppc/lib/Makefile b/arch/ppc/lib/Makefile
new file mode 100644
index 000000000000..1c380e67d435
--- /dev/null
+++ b/arch/ppc/lib/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for ppc-specific library files..
+#
+
+obj-y			:= checksum.o string.o strcase.o dec_and_lock.o div64.o
+
+obj-$(CONFIG_SMP)	+= locks.o
+obj-$(CONFIG_8xx)	+= rheap.o
+obj-$(CONFIG_CPM2)	+= rheap.o
diff --git a/arch/ppc/lib/checksum.S b/arch/ppc/lib/checksum.S
new file mode 100644
index 000000000000..7874e8a80455
--- /dev/null
+++ b/arch/ppc/lib/checksum.S
@@ -0,0 +1,225 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+/*
+ * ip_fast_csum(buf, len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * csum_partial(buff, len, sum)
+ */
+_GLOBAL(csum_partial)
+	addic	r0,r5,0
+	subi	r3,r3,4
+	srwi.	r6,r4,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r5,r3,2		/* Align buffer to longword boundary */
+	beq+	1f
+	lhz	r5,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r4,r4,2
+	addc	r0,r0,r5
+	srwi.	r6,r4,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+2:	lwzu	r5,4(r3)	/* the bdnz has zero overhead, so it should */
+	adde	r0,r0,r5	/* be unnecessary to unroll this loop */
+	bdnz	2b
+	andi.	r4,r4,3
+3:	cmpwi	0,r4,2
+	blt+	4f
+	lhz	r5,4(r3)
+	addi	r3,r3,2
+	subi	r4,r4,2
+	adde	r0,r0,r5
+4:	cmpwi	0,r4,1
+	bne+	5f
+	lbz	r5,4(r3)
+	slwi	r5,r5,8		/* Upper byte of word */
+	adde	r0,r0,r5
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	srwi.	r6,r5,4		/* # groups of 4 words to do */
+	beq	10f
+	mtctr	r6
+71:	lwz	r6,4(r3)
+72:	lwz	r9,8(r3)
+73:	lwz	r10,12(r3)
+74:	lwzu	r11,16(r3)
+	adde	r0,r0,r6
+75:	stw	r6,4(r4)
+	adde	r0,r0,r9
+76:	stw	r9,8(r4)
+	adde	r0,r0,r10
+77:	stw	r10,12(r4)
+	adde	r0,r0,r11
+78:	stwu	r11,16(r4)
+	bdnz	71b
+10:	rlwinm.	r6,r5,30,30,31	/* # words left to do */
+	beq	13f
+	mtctr	r6
+82:	lwzu	r9,4(r3)
+92:	stwu	r9,4(r4)
+	adde	r0,r0,r9
+	bdnz	82b
+13:	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+src_error_4:
+	mfctr	r6		/* update # bytes remaining from ctr */
+	rlwimi	r5,r6,4,0,27
+	b	79f
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+79:	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+src_error:
+	cmpwi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+dst_error:
+	cmpwi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.long	81b,src_error_1
+	.long	91b,dst_error
+	.long	71b,src_error_4
+	.long	72b,src_error_4
+	.long	73b,src_error_4
+	.long	74b,src_error_4
+	.long	75b,dst_error
+	.long	76b,dst_error
+	.long	77b,dst_error
+	.long	78b,dst_error
+	.long	82b,src_error_2
+	.long	92b,dst_error
+	.long	83b,src_error_3
+	.long	93b,dst_error
+	.long	84b,src_error_3
+	.long	94b,dst_error
+	.long	95b,dst_error
+	.long	96b,dst_error
+	.long	97b,dst_error
diff --git a/arch/ppc/lib/dec_and_lock.c b/arch/ppc/lib/dec_and_lock.c
new file mode 100644
index 000000000000..4ee888070d91
--- /dev/null
+++ b/arch/ppc/lib/dec_and_lock.c
@@ -0,0 +1,46 @@
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+
+/*
+ * This is an implementation of the notion of "decrement a
+ * reference count, and return locked if it decremented to zero".
+ *
+ * This implementation can be used on any architecture that
+ * has a cmpxchg, and where atomic->value is an int holding
+ * the value of the atomic (i.e. the high bits aren't used
+ * for a lock or anything like that).
+ *
+ * N.B. ATOMIC_DEC_AND_LOCK gets defined in include/linux/spinlock.h
+ * if spinlocks are empty and thus atomic_dec_and_lock is defined
+ * to be atomic_dec_and_test - in that case we don't need it
+ * defined here as well.
+ */
+
+#ifndef ATOMIC_DEC_AND_LOCK
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+	int counter;
+	int newcount;
+
+	for (;;) {
+		counter = atomic_read(atomic);
+		newcount = counter - 1;
+		if (!newcount)
+			break;		/* do it the slow way */
+
+		newcount = cmpxchg(&atomic->counter, counter, newcount);
+		if (newcount == counter)
+			return 0;
+	}
+
+	spin_lock(lock);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	spin_unlock(lock);
+	return 0;
+}
+
+EXPORT_SYMBOL(_atomic_dec_and_lock);
+#endif /* ATOMIC_DEC_AND_LOCK */
diff --git a/arch/ppc/lib/div64.S b/arch/ppc/lib/div64.S
new file mode 100644
index 000000000000..3527569e9926
--- /dev/null
+++ b/arch/ppc/lib/div64.S
@@ -0,0 +1,58 @@
+/*
+ * Divide a 64-bit unsigned number by a 32-bit unsigned number.
+ * This routine assumes that the top 32 bits of the dividend are
+ * non-zero to start with.
+ * On entry, r3 points to the dividend, which get overwritten with
+ * the 64-bit quotient, and r4 contains the divisor.
+ * On exit, r3 contains the remainder.
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+_GLOBAL(__div64_32)
+	lwz	r5,0(r3)	# get the dividend into r5/r6
+	lwz	r6,4(r3)
+	cmplw	r5,r4
+	li	r7,0
+	li	r8,0
+	blt	1f
+	divwu	r7,r5,r4	# if dividend.hi >= divisor,
+	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
+	subf.	r5,r0,r5	# dividend.hi %= divisor
+	beq	3f
+1:	mr	r11,r5		# here dividend.hi != 0
+	andis.	r0,r5,0xc000
+	bne	2f
+	cntlzw	r0,r5		# we are shifting the dividend right
+	li	r10,-1		# to make it < 2^32, and shifting
+	srw	r10,r10,r0	# the divisor right the same amount,
+	add	r9,r4,r10	# rounding up (so the estimate cannot
+	andc	r11,r6,r10	# ever be too large, only too small)
+	andc	r9,r9,r10
+	or	r11,r5,r11
+	rotlw	r9,r9,r0
+	rotlw	r11,r11,r0
+	divwu	r11,r11,r9	# then we divide the shifted quantities
+2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
+	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
+	subfc	r6,r10,r6	# take the product from the divisor,
+	add	r8,r8,r11	# and add the estimate to the accumulated
+	subfe.	r5,r9,r5	# quotient
+	bne	1b
+3:	cmplw	r6,r4
+	blt	4f
+	divwu	r0,r6,r4	# perform the remaining 32-bit division
+	mullw	r10,r0,r4	# and get the remainder
+	add	r8,r8,r0
+	subf	r6,r10,r6
+4:	stw	r7,0(r3)	# return the quotient in *r3
+	stw	r8,4(r3)
+	mr	r3,r6		# return the remainder in r3
+	blr
diff --git a/arch/ppc/lib/locks.c b/arch/ppc/lib/locks.c
new file mode 100644
index 000000000000..694163d696d8
--- /dev/null
+++ b/arch/ppc/lib/locks.c
@@ -0,0 +1,190 @@
+/*
+ * Locks for smp ppc
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu)
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/ppc_asm.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+
+#undef INIT_STUCK
+#define INIT_STUCK 200000000 /*0xffffffff*/
+
+/*
+ * Try to acquire a spinlock.
+ * Only does the stwcx. if the load returned 0 - the Programming
+ * Environments Manual suggests not doing unnecessary stcwx.'s
+ * since they may inhibit forward progress by other CPUs in getting
+ * a lock.
+ */
+static inline unsigned long __spin_trylock(volatile unsigned long *lock)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__ ("\n\
+1:	lwarx	%0,0,%1\n\
+	cmpwi	0,%0,0\n\
+	bne	2f\n"
+	PPC405_ERR77(0,%1)
+"	stwcx.	%2,0,%1\n\
+	bne-	1b\n\
+	isync\n\
+2:"
+	: "=&r"(ret)
+	: "r"(lock), "r"(1)
+	: "cr0", "memory");
+
+	return ret;
+}
+
+void _raw_spin_lock(spinlock_t *lock)
+{
+	int cpu = smp_processor_id();
+	unsigned int stuck = INIT_STUCK;
+	while (__spin_trylock(&lock->lock)) {
+		while ((unsigned volatile long)lock->lock != 0) {
+			if (!--stuck) {
+				printk("_spin_lock(%p) CPU#%d NIP %p"
+				       " holder: cpu %ld pc %08lX\n",
+				       lock, cpu, __builtin_return_address(0),
+				       lock->owner_cpu,lock->owner_pc);
+				stuck = INIT_STUCK;
+				/* steal the lock */
+				/*xchg_u32((void *)&lock->lock,0);*/
+			}
+		}
+	}
+	lock->owner_pc = (unsigned long)__builtin_return_address(0);
+	lock->owner_cpu = cpu;
+}
+EXPORT_SYMBOL(_raw_spin_lock);
+
+int _raw_spin_trylock(spinlock_t *lock)
+{
+	if (__spin_trylock(&lock->lock))
+		return 0;
+	lock->owner_cpu = smp_processor_id();
+	lock->owner_pc = (unsigned long)__builtin_return_address(0);
+	return 1;
+}
+EXPORT_SYMBOL(_raw_spin_trylock);
+
+void _raw_spin_unlock(spinlock_t *lp)
+{
+  	if ( !lp->lock )
+		printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n",
+		       lp, smp_processor_id(), __builtin_return_address(0),
+		       current->comm, current->pid);
+	if ( lp->owner_cpu != smp_processor_id() )
+		printk("_spin_unlock(%p): cpu %d trying clear of cpu %d pc %lx val %lx\n",
+		      lp, smp_processor_id(), (int)lp->owner_cpu,
+		      lp->owner_pc,lp->lock);
+	lp->owner_pc = lp->owner_cpu = 0;
+	wmb();
+	lp->lock = 0;
+}
+EXPORT_SYMBOL(_raw_spin_unlock);
+
+/*
+ * For rwlocks, zero is unlocked, -1 is write-locked,
+ * positive is read-locked.
+ */
+static __inline__ int __read_trylock(rwlock_t *rw)
+{
+	signed int tmp;
+
+	__asm__ __volatile__(
+"2:	lwarx	%0,0,%1		# __read_trylock\n\
+	addic.	%0,%0,1\n\
+	ble-	1f\n"
+	PPC405_ERR77(0,%1)
+"	stwcx.	%0,0,%1\n\
+	bne-	2b\n\
+	isync\n\
+1:"
+	: "=&r"(tmp)
+	: "r"(&rw->lock)
+	: "cr0", "memory");
+
+	return tmp;
+}
+
+int _raw_read_trylock(rwlock_t *rw)
+{
+	return __read_trylock(rw) > 0;
+}
+EXPORT_SYMBOL(_raw_read_trylock);
+
+void _raw_read_lock(rwlock_t *rw)
+{
+	unsigned int stuck;
+
+	while (__read_trylock(rw) <= 0) {
+		stuck = INIT_STUCK;
+		while (!read_can_lock(rw)) {
+			if (--stuck == 0) {
+				printk("_read_lock(%p) CPU#%d lock %d\n",
+				       rw, _smp_processor_id(), rw->lock);
+				stuck = INIT_STUCK;
+			}
+		}
+	}
+}
+EXPORT_SYMBOL(_raw_read_lock);
+
+void _raw_read_unlock(rwlock_t *rw)
+{
+	if ( rw->lock == 0 )
+		printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n",
+		       current->comm,current->pid,current->thread.regs->nip,
+		      rw->lock);
+	wmb();
+	atomic_dec((atomic_t *) &(rw)->lock);
+}
+EXPORT_SYMBOL(_raw_read_unlock);
+
+void _raw_write_lock(rwlock_t *rw)
+{
+	unsigned int stuck;
+
+	while (cmpxchg(&rw->lock, 0, -1) != 0) {
+		stuck = INIT_STUCK;
+		while (!write_can_lock(rw)) {
+			if (--stuck == 0) {
+				printk("write_lock(%p) CPU#%d lock %d)\n",
+				       rw, _smp_processor_id(), rw->lock);
+				stuck = INIT_STUCK;
+			}
+		}
+	}
+	wmb();
+}
+EXPORT_SYMBOL(_raw_write_lock);
+
+int _raw_write_trylock(rwlock_t *rw)
+{
+	if (cmpxchg(&rw->lock, 0, -1) != 0)
+		return 0;
+	wmb();
+	return 1;
+}
+EXPORT_SYMBOL(_raw_write_trylock);
+
+void _raw_write_unlock(rwlock_t *rw)
+{
+	if (rw->lock >= 0)
+		printk("_write_lock(): %s/%d (nip %08lX) lock %d\n",
+		      current->comm,current->pid,current->thread.regs->nip,
+		      rw->lock);
+	wmb();
+	rw->lock = 0;
+}
+EXPORT_SYMBOL(_raw_write_unlock);
+
+#endif
diff --git a/arch/ppc/lib/rheap.c b/arch/ppc/lib/rheap.c
new file mode 100644
index 000000000000..42c5de2c898f
--- /dev/null
+++ b/arch/ppc/lib/rheap.c
@@ -0,0 +1,693 @@
+/*
+ * arch/ppc/syslib/rheap.c
+ *
+ * A Remote Heap.  Remote means that we don't touch the memory that the
+ * heap points to. Normal heap implementations use the memory they manage
+ * to place their list. We cannot do that because the memory we manage may
+ * have special properties, for example it is uncachable or of different
+ * endianess.
+ *
+ * Author: Pantelis Antoniou <panto@intracom.gr>
+ *
+ * 2004 (c) INTRACOM S.A. Greece. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/rheap.h>
+
+/*
+ * Fixup a list_head, needed when copying lists.  If the pointers fall
+ * between s and e, apply the delta.  This assumes that
+ * sizeof(struct list_head *) == sizeof(unsigned long *).
+ */
+static inline void fixup(unsigned long s, unsigned long e, int d,
+			 struct list_head *l)
+{
+	unsigned long *pp;
+
+	pp = (unsigned long *)&l->next;
+	if (*pp >= s && *pp < e)
+		*pp += d;
+
+	pp = (unsigned long *)&l->prev;
+	if (*pp >= s && *pp < e)
+		*pp += d;
+}
+
+/* Grow the allocated blocks */
+static int grow(rh_info_t * info, int max_blocks)
+{
+	rh_block_t *block, *blk;
+	int i, new_blocks;
+	int delta;
+	unsigned long blks, blke;
+
+	if (max_blocks <= info->max_blocks)
+		return -EINVAL;
+
+	new_blocks = max_blocks - info->max_blocks;
+
+	block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_KERNEL);
+	if (block == NULL)
+		return -ENOMEM;
+
+	if (info->max_blocks > 0) {
+
+		/* copy old block area */
+		memcpy(block, info->block,
+		       sizeof(rh_block_t) * info->max_blocks);
+
+		delta = (char *)block - (char *)info->block;
+
+		/* and fixup list pointers */
+		blks = (unsigned long)info->block;
+		blke = (unsigned long)(info->block + info->max_blocks);
+
+		for (i = 0, blk = block; i < info->max_blocks; i++, blk++)
+			fixup(blks, blke, delta, &blk->list);
+
+		fixup(blks, blke, delta, &info->empty_list);
+		fixup(blks, blke, delta, &info->free_list);
+		fixup(blks, blke, delta, &info->taken_list);
+
+		/* free the old allocated memory */
+		if ((info->flags & RHIF_STATIC_BLOCK) == 0)
+			kfree(info->block);
+	}
+
+	info->block = block;
+	info->empty_slots += new_blocks;
+	info->max_blocks = max_blocks;
+	info->flags &= ~RHIF_STATIC_BLOCK;
+
+	/* add all new blocks to the free list */
+	for (i = 0, blk = block + info->max_blocks; i < new_blocks; i++, blk++)
+		list_add(&blk->list, &info->empty_list);
+
+	return 0;
+}
+
+/*
+ * Assure at least the required amount of empty slots.  If this function
+ * causes a grow in the block area then all pointers kept to the block
+ * area are invalid!
+ */
+static int assure_empty(rh_info_t * info, int slots)
+{
+	int max_blocks;
+
+	/* This function is not meant to be used to grow uncontrollably */
+	if (slots >= 4)
+		return -EINVAL;
+
+	/* Enough space */
+	if (info->empty_slots >= slots)
+		return 0;
+
+	/* Next 16 sized block */
+	max_blocks = ((info->max_blocks + slots) + 15) & ~15;
+
+	return grow(info, max_blocks);
+}
+
+static rh_block_t *get_slot(rh_info_t * info)
+{
+	rh_block_t *blk;
+
+	/* If no more free slots, and failure to extend. */
+	/* XXX: You should have called assure_empty before */
+	if (info->empty_slots == 0) {
+		printk(KERN_ERR "rh: out of slots; crash is imminent.\n");
+		return NULL;
+	}
+
+	/* Get empty slot to use */
+	blk = list_entry(info->empty_list.next, rh_block_t, list);
+	list_del_init(&blk->list);
+	info->empty_slots--;
+
+	/* Initialize */
+	blk->start = NULL;
+	blk->size = 0;
+	blk->owner = NULL;
+
+	return blk;
+}
+
+static inline void release_slot(rh_info_t * info, rh_block_t * blk)
+{
+	list_add(&blk->list, &info->empty_list);
+	info->empty_slots++;
+}
+
+static void attach_free_block(rh_info_t * info, rh_block_t * blkn)
+{
+	rh_block_t *blk;
+	rh_block_t *before;
+	rh_block_t *after;
+	rh_block_t *next;
+	int size;
+	unsigned long s, e, bs, be;
+	struct list_head *l;
+
+	/* We assume that they are aligned properly */
+	size = blkn->size;
+	s = (unsigned long)blkn->start;
+	e = s + size;
+
+	/* Find the blocks immediately before and after the given one
+	 * (if any) */
+	before = NULL;
+	after = NULL;
+	next = NULL;
+
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+
+		bs = (unsigned long)blk->start;
+		be = bs + blk->size;
+
+		if (next == NULL && s >= bs)
+			next = blk;
+
+		if (be == s)
+			before = blk;
+
+		if (e == bs)
+			after = blk;
+
+		/* If both are not null, break now */
+		if (before != NULL && after != NULL)
+			break;
+	}
+
+	/* Now check if they are really adjacent */
+	if (before != NULL && s != (unsigned long)before->start + before->size)
+		before = NULL;
+
+	if (after != NULL && e != (unsigned long)after->start)
+		after = NULL;
+
+	/* No coalescing; list insert and return */
+	if (before == NULL && after == NULL) {
+
+		if (next != NULL)
+			list_add(&blkn->list, &next->list);
+		else
+			list_add(&blkn->list, &info->free_list);
+
+		return;
+	}
+
+	/* We don't need it anymore */
+	release_slot(info, blkn);
+
+	/* Grow the before block */
+	if (before != NULL && after == NULL) {
+		before->size += size;
+		return;
+	}
+
+	/* Grow the after block backwards */
+	if (before == NULL && after != NULL) {
+		after->start = (int8_t *)after->start - size;
+		after->size += size;
+		return;
+	}
+
+	/* Grow the before block, and release the after block */
+	before->size += size + after->size;
+	list_del(&after->list);
+	release_slot(info, after);
+}
+
+static void attach_taken_block(rh_info_t * info, rh_block_t * blkn)
+{
+	rh_block_t *blk;
+	struct list_head *l;
+
+	/* Find the block immediately before the given one (if any) */
+	list_for_each(l, &info->taken_list) {
+		blk = list_entry(l, rh_block_t, list);
+		if (blk->start > blkn->start) {
+			list_add_tail(&blkn->list, &blk->list);
+			return;
+		}
+	}
+
+	list_add_tail(&blkn->list, &info->taken_list);
+}
+
+/*
+ * Create a remote heap dynamically.  Note that no memory for the blocks
+ * are allocated.  It will upon the first allocation
+ */
+rh_info_t *rh_create(unsigned int alignment)
+{
+	rh_info_t *info;
+
+	/* Alignment must be a power of two */
+	if ((alignment & (alignment - 1)) != 0)
+		return ERR_PTR(-EINVAL);
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (info == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	info->alignment = alignment;
+
+	/* Initially everything as empty */
+	info->block = NULL;
+	info->max_blocks = 0;
+	info->empty_slots = 0;
+	info->flags = 0;
+
+	INIT_LIST_HEAD(&info->empty_list);
+	INIT_LIST_HEAD(&info->free_list);
+	INIT_LIST_HEAD(&info->taken_list);
+
+	return info;
+}
+
+/*
+ * Destroy a dynamically created remote heap.  Deallocate only if the areas
+ * are not static
+ */
+void rh_destroy(rh_info_t * info)
+{
+	if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
+		kfree(info->block);
+
+	if ((info->flags & RHIF_STATIC_INFO) == 0)
+		kfree(info);
+}
+
+/*
+ * Initialize in place a remote heap info block.  This is needed to support
+ * operation very early in the startup of the kernel, when it is not yet safe
+ * to call kmalloc.
+ */
+void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
+	     rh_block_t * block)
+{
+	int i;
+	rh_block_t *blk;
+
+	/* Alignment must be a power of two */
+	if ((alignment & (alignment - 1)) != 0)
+		return;
+
+	info->alignment = alignment;
+
+	/* Initially everything as empty */
+	info->block = block;
+	info->max_blocks = max_blocks;
+	info->empty_slots = max_blocks;
+	info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK;
+
+	INIT_LIST_HEAD(&info->empty_list);
+	INIT_LIST_HEAD(&info->free_list);
+	INIT_LIST_HEAD(&info->taken_list);
+
+	/* Add all new blocks to the free list */
+	for (i = 0, blk = block; i < max_blocks; i++, blk++)
+		list_add(&blk->list, &info->empty_list);
+}
+
+/* Attach a free memory region, coalesces regions if adjuscent */
+int rh_attach_region(rh_info_t * info, void *start, int size)
+{
+	rh_block_t *blk;
+	unsigned long s, e, m;
+	int r;
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	/* Take final values */
+	start = (void *)s;
+	size = (int)(e - s);
+
+	/* Grow the blocks, if needed */
+	r = assure_empty(info, 1);
+	if (r < 0)
+		return r;
+
+	blk = get_slot(info);
+	blk->start = start;
+	blk->size = size;
+	blk->owner = NULL;
+
+	attach_free_block(info, blk);
+
+	return 0;
+}
+
+/* Detatch given address range, splits free block if needed. */
+void *rh_detach_region(rh_info_t * info, void *start, int size)
+{
+	struct list_head *l;
+	rh_block_t *blk, *newblk;
+	unsigned long s, e, m, bs, be;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (assure_empty(info, 1) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		/* The range must lie entirely inside one free block */
+		bs = (unsigned long)blk->start;
+		be = (unsigned long)blk->start + blk->size;
+		if (s >= bs && e <= be)
+			break;
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Perfect fit */
+	if (bs == s && be == e) {
+		/* Delete from free list, release slot */
+		list_del(&blk->list);
+		release_slot(info, blk);
+		return (void *)s;
+	}
+
+	/* blk still in free list, with updated start and/or size */
+	if (bs == s || be == e) {
+		if (bs == s)
+			blk->start = (int8_t *)blk->start + size;
+		blk->size -= size;
+
+	} else {
+		/* The front free fragment */
+		blk->size = s - bs;
+
+		/* the back free fragment */
+		newblk = get_slot(info);
+		newblk->start = (void *)e;
+		newblk->size = be - e;
+
+		list_add(&newblk->list, &blk->list);
+	}
+
+	return (void *)s;
+}
+
+void *rh_alloc(rh_info_t * info, int size, const char *owner)
+{
+	struct list_head *l;
+	rh_block_t *blk;
+	rh_block_t *newblk;
+	void *start;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* Align to configured alignment */
+	size = (size + (info->alignment - 1)) & ~(info->alignment - 1);
+
+	if (assure_empty(info, 1) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		if (size <= blk->size)
+			break;
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Just fits */
+	if (blk->size == size) {
+		/* Move from free list to taken list */
+		list_del(&blk->list);
+		blk->owner = owner;
+		start = blk->start;
+
+		attach_taken_block(info, blk);
+
+		return start;
+	}
+
+	newblk = get_slot(info);
+	newblk->start = blk->start;
+	newblk->size = size;
+	newblk->owner = owner;
+
+	/* blk still in free list, with updated start, size */
+	blk->start = (int8_t *)blk->start + size;
+	blk->size -= size;
+
+	start = newblk->start;
+
+	attach_taken_block(info, newblk);
+
+	return start;
+}
+
+/* allocate at precisely the given address */
+void *rh_alloc_fixed(rh_info_t * info, void *start, int size, const char *owner)
+{
+	struct list_head *l;
+	rh_block_t *blk, *newblk1, *newblk2;
+	unsigned long s, e, m, bs, be;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (assure_empty(info, 2) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		/* The range must lie entirely inside one free block */
+		bs = (unsigned long)blk->start;
+		be = (unsigned long)blk->start + blk->size;
+		if (s >= bs && e <= be)
+			break;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Perfect fit */
+	if (bs == s && be == e) {
+		/* Move from free list to taken list */
+		list_del(&blk->list);
+		blk->owner = owner;
+
+		start = blk->start;
+		attach_taken_block(info, blk);
+
+		return start;
+
+	}
+
+	/* blk still in free list, with updated start and/or size */
+	if (bs == s || be == e) {
+		if (bs == s)
+			blk->start = (int8_t *)blk->start + size;
+		blk->size -= size;
+
+	} else {
+		/* The front free fragment */
+		blk->size = s - bs;
+
+		/* The back free fragment */
+		newblk2 = get_slot(info);
+		newblk2->start = (void *)e;
+		newblk2->size = be - e;
+
+		list_add(&newblk2->list, &blk->list);
+	}
+
+	newblk1 = get_slot(info);
+	newblk1->start = (void *)s;
+	newblk1->size = e - s;
+	newblk1->owner = owner;
+
+	start = newblk1->start;
+	attach_taken_block(info, newblk1);
+
+	return start;
+}
+
+int rh_free(rh_info_t * info, void *start)
+{
+	rh_block_t *blk, *blk2;
+	struct list_head *l;
+	int size;
+
+	/* Linear search for block */
+	blk = NULL;
+	list_for_each(l, &info->taken_list) {
+		blk2 = list_entry(l, rh_block_t, list);
+		if (start < blk2->start)
+			break;
+		blk = blk2;
+	}
+
+	if (blk == NULL || start > (blk->start + blk->size))
+		return -EINVAL;
+
+	/* Remove from taken list */
+	list_del(&blk->list);
+
+	/* Get size of freed block */
+	size = blk->size;
+	attach_free_block(info, blk);
+
+	return size;
+}
+
+int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats)
+{
+	rh_block_t *blk;
+	struct list_head *l;
+	struct list_head *h;
+	int nr;
+
+	switch (what) {
+
+	case RHGS_FREE:
+		h = &info->free_list;
+		break;
+
+	case RHGS_TAKEN:
+		h = &info->taken_list;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Linear search for block */
+	nr = 0;
+	list_for_each(l, h) {
+		blk = list_entry(l, rh_block_t, list);
+		if (stats != NULL && nr < max_stats) {
+			stats->start = blk->start;
+			stats->size = blk->size;
+			stats->owner = blk->owner;
+			stats++;
+		}
+		nr++;
+	}
+
+	return nr;
+}
+
+int rh_set_owner(rh_info_t * info, void *start, const char *owner)
+{
+	rh_block_t *blk, *blk2;
+	struct list_head *l;
+	int size;
+
+	/* Linear search for block */
+	blk = NULL;
+	list_for_each(l, &info->taken_list) {
+		blk2 = list_entry(l, rh_block_t, list);
+		if (start < blk2->start)
+			break;
+		blk = blk2;
+	}
+
+	if (blk == NULL || start > (blk->start + blk->size))
+		return -EINVAL;
+
+	blk->owner = owner;
+	size = blk->size;
+
+	return size;
+}
+
+void rh_dump(rh_info_t * info)
+{
+	static rh_stats_t st[32];	/* XXX maximum 32 blocks */
+	int maxnr;
+	int i, nr;
+
+	maxnr = sizeof(st) / sizeof(st[0]);
+
+	printk(KERN_INFO
+	       "info @0x%p (%d slots empty / %d max)\n",
+	       info, info->empty_slots, info->max_blocks);
+
+	printk(KERN_INFO "  Free:\n");
+	nr = rh_get_stats(info, RHGS_FREE, maxnr, st);
+	if (nr > maxnr)
+		nr = maxnr;
+	for (i = 0; i < nr; i++)
+		printk(KERN_INFO
+		       "    0x%p-0x%p (%u)\n",
+		       st[i].start, (int8_t *) st[i].start + st[i].size,
+		       st[i].size);
+	printk(KERN_INFO "\n");
+
+	printk(KERN_INFO "  Taken:\n");
+	nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st);
+	if (nr > maxnr)
+		nr = maxnr;
+	for (i = 0; i < nr; i++)
+		printk(KERN_INFO
+		       "    0x%p-0x%p (%u) %s\n",
+		       st[i].start, (int8_t *) st[i].start + st[i].size,
+		       st[i].size, st[i].owner != NULL ? st[i].owner : "");
+	printk(KERN_INFO "\n");
+}
+
+void rh_dump_blk(rh_info_t * info, rh_block_t * blk)
+{
+	printk(KERN_INFO
+	       "blk @0x%p: 0x%p-0x%p (%u)\n",
+	       blk, blk->start, (int8_t *) blk->start + blk->size, blk->size);
+}
diff --git a/arch/ppc/lib/strcase.c b/arch/ppc/lib/strcase.c
new file mode 100644
index 000000000000..36b521091bbc
--- /dev/null
+++ b/arch/ppc/lib/strcase.c
@@ -0,0 +1,23 @@
+#include <linux/ctype.h>
+
+int strcasecmp(const char *s1, const char *s2)
+{
+	int c1, c2;
+
+	do {
+		c1 = tolower(*s1++);
+		c2 = tolower(*s2++);
+	} while (c1 == c2 && c1 != 0);
+	return c1 - c2;
+}
+
+int strncasecmp(const char *s1, const char *s2, int n)
+{
+	int c1, c2;
+
+	do {
+		c1 = tolower(*s1++);
+		c2 = tolower(*s2++);
+	} while ((--n > 0) && c1 == c2 && c1 != 0);
+	return c1 - c2;
+}
diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S
new file mode 100644
index 000000000000..8d08a2eb225e
--- /dev/null
+++ b/arch/ppc/lib/string.S
@@ -0,0 +1,716 @@
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+#define COPY_16_BYTES		\
+	lwz	r7,4(r4);	\
+	lwz	r8,8(r4);	\
+	lwz	r9,12(r4);	\
+	lwzu	r10,16(r4);	\
+	stw	r7,4(r6);	\
+	stw	r8,8(r6);	\
+	stw	r9,12(r6);	\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n)	\
+8 ## n ## 0:			\
+	lwz	r7,4(r4);	\
+8 ## n ## 1:			\
+	lwz	r8,8(r4);	\
+8 ## n ## 2:			\
+	lwz	r9,12(r4);	\
+8 ## n ## 3:			\
+	lwzu	r10,16(r4);	\
+8 ## n ## 4:			\
+	stw	r7,4(r6);	\
+8 ## n ## 5:			\
+	stw	r8,8(r6);	\
+8 ## n ## 6:			\
+	stw	r9,12(r6);	\
+8 ## n ## 7:			\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n)			\
+9 ## n ## 0:					\
+	addi	r5,r5,-(16 * n);		\
+	b	104f;				\
+9 ## n ## 1:					\
+	addi	r5,r5,-(16 * n);		\
+	b	105f;				\
+.section __ex_table,"a";			\
+	.align	2;				\
+	.long	8 ## n ## 0b,9 ## n ## 0b;	\
+	.long	8 ## n ## 1b,9 ## n ## 0b;	\
+	.long	8 ## n ## 2b,9 ## n ## 0b;	\
+	.long	8 ## n ## 3b,9 ## n ## 0b;	\
+	.long	8 ## n ## 4b,9 ## n ## 1b;	\
+	.long	8 ## n ## 5b,9 ## n ## 1b;	\
+	.long	8 ## n ## 6b,9 ## n ## 1b;	\
+	.long	8 ## n ## 7b,9 ## n ## 1b;	\
+	.text
+
+	.text
+	.stabs	"arch/ppc/lib/",N_SO,0,0,0f
+	.stabs	"string.S",N_SO,0,0,0f
+
+CACHELINE_BYTES = L1_CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
+CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+
+_GLOBAL(strcpy)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+/* This clears out any unused part of the destination buffer,
+   just as the libc version does.  -- paulus */
+_GLOBAL(strncpy)
+	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	bnelr			/* if we didn't hit a null char, we're done */
+	mfctr	r5
+	cmpwi	0,r5,0		/* any space left in destination buffer? */
+	beqlr			/* we know r0 == 0 here */
+2:	stbu	r0,1(r6)	/* clear it out if so */
+	bdnz	2b
+	blr
+
+_GLOBAL(strcat)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r5)
+	cmpwi	0,r0,0
+	bne	1b
+	addi	r5,r5,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+_GLOBAL(strcmp)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r5)
+	cmpwi	1,r3,0
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	beqlr	1
+	beq	1b
+	blr
+
+_GLOBAL(strlen)
+	addi	r4,r3,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	bne	1b
+	subf	r3,r3,r4
+	blr
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.  -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+	mr	r5,r4
+	li	r4,0
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
+	add	r8,r7,r5
+	srwi	r9,r8,LG_CACHELINE_BYTES
+	addic.	r9,r9,-1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0,r7,CACHELINE_MASK & ~3
+	srwi.	r0,r0,2
+	beq	3f
+	mtctr	r0
+4:	stwu	r4,4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7,4
+#if !defined(CONFIG_8xx)
+10:	dcbz	r7,r6
+#else
+10:	stw	r4, 4(r6)
+	stw	r4, 8(r6)
+	stw	r4, 12(r6)
+	stw	r4, 16(r6)
+#if CACHE_LINE_SIZE >= 32
+	stw	r4, 20(r6)
+	stw	r4, 24(r6)
+	stw	r4, 28(r6)
+	stw	r4, 32(r6)
+#endif /* CACHE_LINE_SIZE */
+#endif
+	addi	r6,r6,CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
+	addi	r5,r5,4
+2:	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+_GLOBAL(memset)
+	rlwimi	r4,r4,8,16,23
+	rlwimi	r4,r4,16,0,15
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic.  This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBAL(cacheable_memcpy)
+	add	r7,r3,r5		/* test if the src & dst overlap */
+	add	r8,r4,r5
+	cmplw	0,r4,r7
+	cmplw	1,r3,r8
+	crand	0,0,4			/* cr0.lt &= cr1.lt */
+	blt	memcpy			/* if regions overlap */
+
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	subf	r5,r0,r5
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+	stwu	r9,4(r6)
+	bdnz	72b
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	mtctr	r0
+	beq	63f
+53:
+#if !defined(CONFIG_8xx)
+	dcbz	r11,r6
+#endif
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+#endif
+#endif
+#endif
+	bdnz	53b
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	blr
+
+_GLOBAL(memmove)
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+_GLOBAL(memcpy)
+	srwi.	r7,r5,3
+	addi	r6,r3,-4
+	addi	r4,r4,-4
+	beq	2f			/* if less than 8 bytes to do */
+	andi.	r0,r6,3			/* get dest word aligned */
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,4(r4)
+	lwzu	r8,8(r4)
+	stw	r7,4(r6)
+	stwu	r8,8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,4(r4)
+	addi	r5,r5,-4
+	stwu	r0,4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r4,r4,3
+	addi	r6,r6,3
+4:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	4b
+	blr
+5:	subfic	r0,r0,4
+	mtctr	r0
+6:	lbz	r7,4(r4)
+	addi	r4,r4,1
+	stb	r7,4(r6)
+	addi	r6,r6,1
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(memcmp)
+	cmpwi	0,r5,0
+	ble-	2f
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r6)
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	bdnzt	2,1b
+	blr
+2:	li	r3,0
+	blr
+
+_GLOBAL(memchr)
+	cmpwi	0,r5,0
+	ble-	2f
+	mtctr	r5
+	addi	r3,r3,-1
+1:	lbzu	r0,1(r3)
+	cmpw	0,r0,r4
+	bdnzf	2,1b
+	beqlr
+2:	li	r3,0
+	blr
+
+_GLOBAL(__copy_tofrom_user)
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+71:	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	subf	r5,r0,r5
+	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+73:	stwu	r9,4(r6)
+	bdnz	72b
+
+	.section __ex_table,"a"
+	.align	2
+	.long	70b,100f
+	.long	71b,101f
+	.long	72b,102f
+	.long	73b,103f
+	.text
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	beq	63f
+
+#ifdef CONFIG_8xx
+	/* Don't use prefetch on 8xx */
+	mtctr	r0
+53:	COPY_16_BYTES_WITHEX(0)
+	bdnz	53b
+
+#else /* not CONFIG_8xx */
+	/* Here we decide how far ahead to prefetch the source */
+	li	r3,4
+	cmpwi	r0,1
+	li	r7,0
+	ble	114f
+	li	r7,1
+#if MAX_COPY_PREFETCH > 1
+	/* Heuristically, for large transfers we prefetch
+	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
+	   we prefetch 1 cacheline ahead. */
+	cmpwi	r0,MAX_COPY_PREFETCH
+	ble	112f
+	li	r7,MAX_COPY_PREFETCH
+112:	mtctr	r7
+111:	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+	bdnz	111b
+#else
+	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114:	subf	r8,r7,r0
+	mr	r0,r7
+	mtctr	r8
+
+53:	dcbt	r3,r4
+54:	dcbz	r11,r6
+	.section __ex_table,"a"
+	.align	2
+	.long	54b,105f
+	.text
+/* the main body of the cacheline loop */
+	COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_WITHEX(2)
+	COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_WITHEX(4)
+	COPY_16_BYTES_WITHEX(5)
+	COPY_16_BYTES_WITHEX(6)
+	COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+	bdnz	53b
+	cmpwi	r0,0
+	li	r3,4
+	li	r7,0
+	bne	114b
+#endif /* CONFIG_8xx */
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+31:	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+41:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	li	r3,0
+	blr
+
+/* read fault, initial single-byte copy */
+100:	li	r9,0
+	b	90f
+/* write fault, initial single-byte copy */
+101:	li	r9,1
+90:	subf	r5,r8,r5
+	li	r3,0
+	b	99f
+/* read fault, initial word copy */
+102:	li	r9,0
+	b	91f
+/* write fault, initial word copy */
+103:	li	r9,1
+91:	li	r3,2
+	b	99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+	COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_EXCODE(2)
+	COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_EXCODE(4)
+	COPY_16_BYTES_EXCODE(5)
+	COPY_16_BYTES_EXCODE(6)
+	COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104:	li	r9,0
+	b	92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105:	li	r9,1
+92:	li	r3,LG_CACHELINE_BYTES
+	b	99f
+/* read fault in final word loop */
+108:	li	r9,0
+	b	93f
+/* write fault in final word loop */
+109:	li	r9,1
+93:	andi.	r5,r5,3
+	li	r3,2
+	b	99f
+/* read fault in final byte loop */
+110:	li	r9,0
+	b	94f
+/* write fault in final byte loop */
+111:	li	r9,1
+94:	li	r5,0
+	li	r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
+ */
+99:	mfctr	r0
+	slw	r3,r0,r3
+	add.	r3,r3,r5
+	beq	120f			/* shouldn't happen */
+	cmpwi	0,r9,0
+	bne	120f
+/* for a read fault, first try to continue the copy one byte at a time */
+	mtctr	r3
+130:	lbz	r0,4(r4)
+131:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	130b
+/* then clear out the destination: r3 bytes starting at 4(r6) */
+132:	mfctr	r3
+	srwi.	r0,r3,2
+	li	r9,0
+	mtctr	r0
+	beq	113f
+112:	stwu	r9,4(r6)
+	bdnz	112b
+113:	andi.	r0,r3,3
+	mtctr	r0
+	beq	120f
+114:	stb	r9,4(r6)
+	addi	r6,r6,1
+	bdnz	114b
+120:	blr
+
+	.section __ex_table,"a"
+	.align	2
+	.long	30b,108b
+	.long	31b,109b
+	.long	40b,110b
+	.long	41b,111b
+	.long	130b,132b
+	.long	131b,120b
+	.long	112b,120b
+	.long	114b,120b
+	.text
+
+_GLOBAL(__clear_user)
+	addi	r6,r3,-4
+	li	r3,0
+	li	r5,0
+	cmplwi	0,r4,4
+	blt	7f
+	/* clear a single word */
+11:	stwu	r5,4(r6)
+	beqlr
+	/* clear word sized chunks */
+	andi.	r0,r6,3
+	add	r4,r0,r4
+	subf	r6,r0,r6
+	srwi	r0,r4,2
+	andi.	r4,r4,3
+	mtctr	r0
+	bdz	7f
+1:	stwu	r5,4(r6)
+	bdnz	1b
+	/* clear byte sized chunks */
+7:	cmpwi	0,r4,0
+	beqlr
+	mtctr	r4
+	addi	r6,r6,3
+8:	stbu	r5,1(r6)
+	bdnz	8b
+	blr
+90:	mr	r3,r4
+	blr
+91:	mfctr	r3
+	slwi	r3,r3,2
+	add	r3,r3,r4
+	blr
+92:	mfctr	r3
+	blr
+
+	.section __ex_table,"a"
+	.align	2
+	.long	11b,90b
+	.long	1b,91b
+	.long	8b,92b
+	.text
+
+_GLOBAL(__strncpy_from_user)
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+	cmpwi	0,r5,0
+	beq	2f
+	mtctr	r5
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	beq	3f
+2:	addi	r6,r6,1
+3:	subf	r3,r3,r6
+	blr
+99:	li	r3,-EFAULT
+	blr
+
+	.section __ex_table,"a"
+	.align	2
+	.long	1b,99b
+	.text
+
+/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
+_GLOBAL(__strnlen_user)
+	addi	r7,r3,-1
+	subf	r6,r7,r5	/* top+1 - str */
+	cmplw	0,r4,r6
+	bge	0f
+	mr	r6,r4
+0:	mtctr	r6		/* ctr = min(len, top - str) */
+1:	lbzu	r0,1(r7)	/* get next byte */
+	cmpwi	0,r0,0
+	bdnzf	2,1b		/* loop if --ctr != 0 && byte != 0 */
+	addi	r7,r7,1
+	subf	r3,r3,r7	/* number of bytes we have looked at */
+	beqlr			/* return if we found a 0 byte */
+	cmpw	0,r3,r4		/* did we look at all len bytes? */
+	blt	99f		/* if not, must have hit top */
+	addi	r3,r4,1		/* return len + 1 to indicate no null found */
+	blr
+99:	li	r3,0		/* bad address, return 0 */
+	blr
+
+	.section __ex_table,"a"
+	.align	2
+	.long	1b,99b
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc/lib
download	linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.bz2 linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip