--- linux-2.6.8.1-t044-driver-update/Documentation/aoe/todo.txt 1970-01-01 03:00:00.000000000 +0300 +++ aoe/Documentation/aoe/todo.txt 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,14 @@ +There is a potential for deadlock when allocating a struct sk_buff for +data that needs to be written out to aoe storage. If the data is +being written from a dirty page in order to free that page, and if +there are no other pages available, then deadlock may occur when a +free page is needed for the sk_buff allocation. This situation has +not been observed, but it would be nice to eliminate any potential for +deadlock under memory pressure. + +Because ATA over Ethernet is not fragmented by the kernel's IP code, +the destructore member of the struct sk_buff is available to the aoe +driver. By using a mempool for allocating all but the first few +sk_buffs, and by registering a destructor, we should be able to +efficiently allocate sk_buffs without introducing any potential for +deadlock. --- linux-2.6.8.1-t044-driver-update/Documentation/aoe/udev-install.sh 1970-01-01 03:00:00.000000000 +0300 +++ aoe/Documentation/aoe/udev-install.sh 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,30 @@ +# install the aoe-specific udev rules from udev.txt into +# the system's udev configuration +# + +me="`basename $0`" + +# find udev.conf, often /etc/udev/udev.conf +# (or environment can specify where to find udev.conf) +# +if test -z "$conf"; then + if test -r /etc/udev/udev.conf; then + conf=/etc/udev/udev.conf + else + conf="`find /etc -type f -name udev.conf 2> /dev/null`" + if test -z "$conf" || test ! -r "$conf"; then + echo "$me Error: no udev.conf found" 1>&2 + exit 1 + fi + fi +fi + +# find the directory where udev rules are stored, often +# /etc/udev/rules.d +# +rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`" +if test -z "$rules_d" || test ! -d "$rules_d"; then + echo "$me Error: cannot find udev rules directory" 1>&2 + exit 1 +fi +sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules" --- linux-2.6.8.1-t044-driver-update/Documentation/aoe/status.sh 1970-01-01 03:00:00.000000000 +0300 +++ aoe/Documentation/aoe/status.sh 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,27 @@ +#! /bin/sh +# collate and present sysfs information about AoE storage + +set -e +format="%8s\t%8s\t%8s\n" +me=`basename $0` +sysd=${sysfs_dir:-/sys} + +# printf "$format" device mac netif state + +# Suse 9.1 Pro doesn't put /sys in /etc/mtab +#test -z "`mount | grep sysfs`" && { +test ! -d "$sysd/block" && { + echo "$me Error: sysfs is not mounted" 1>&2 + exit 1 +} + +for d in `ls -d $sysd/block/etherd* 2>/dev/null | grep -v p` end; do + # maybe ls comes up empty, so we use "end" + test $d = end && continue + + dev=`echo "$d" | sed 's/.*!//'` + printf "$format" \ + "$dev" \ + "`cat \"$d/netif\"`" \ + "`cat \"$d/state\"`" +done | sort --- linux-2.6.8.1-t044-driver-update/Documentation/aoe/mkdevs.sh 1970-01-01 03:00:00.000000000 +0300 +++ aoe/Documentation/aoe/mkdevs.sh 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,37 @@ +#!/bin/sh + +n_shelves=${n_shelves:-10} +n_partitions=${n_partitions:-16} + +if test "$#" != "1"; then + echo "Usage: sh `basename $0` {dir}" 1>&2 + echo " n_partitions=16 sh `basename $0` {dir}" 1>&2 + exit 1 +fi +dir=$1 + +MAJOR=152 + +echo "Creating AoE devnode files in $dir ..." + +set -e + +mkdir -p $dir + +# (Status info is in sysfs. See status.sh.) +# rm -f $dir/stat +# mknod -m 0400 $dir/stat c $MAJOR 1 +rm -f $dir/err +mknod -m 0400 $dir/err c $MAJOR 2 +rm -f $dir/discover +mknod -m 0200 $dir/discover c $MAJOR 3 +rm -f $dir/interfaces +mknod -m 0200 $dir/interfaces c $MAJOR 4 + +export n_partitions +mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'` +i=0 +while test $i -lt $n_shelves; do + sh -xc "sh $mkshelf $dir $i" + i=`expr $i + 1` +done --- linux-2.6.8.1-t044-driver-update/Documentation/aoe/mkshelf.sh 1970-01-01 03:00:00.000000000 +0300 +++ aoe/Documentation/aoe/mkshelf.sh 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,28 @@ +#! /bin/sh + +if test "$#" != "2"; then + echo "Usage: sh `basename $0` {dir} {shelfaddress}" 1>&2 + echo " n_partitions=16 sh `basename $0` {dir} {shelfaddress}" 1>&2 + exit 1 +fi +n_partitions=${n_partitions:-16} +dir=$1 +shelf=$2 +nslots=16 +maxslot=`echo $nslots 1 - p | dc` +MAJOR=152 + +set -e + +minor=`echo $nslots \* $shelf \* $n_partitions | bc` +endp=`echo $n_partitions - 1 | bc` +for slot in `seq 0 $maxslot`; do + for part in `seq 0 $endp`; do + name=e$shelf.$slot + test "$part" != "0" && name=${name}p$part + rm -f $dir/$name + mknod -m 0660 $dir/$name b $MAJOR $minor + + minor=`expr $minor + 1` + done +done --- linux-2.6.8.1-t044-driver-update/Documentation/aoe/aoe.txt 1970-01-01 03:00:00.000000000 +0300 +++ aoe/Documentation/aoe/aoe.txt 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,123 @@ +The EtherDrive (R) HOWTO for users of 2.6 kernels is found at ... + + http://www.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html + + It has many tips and hints! + +The aoetools are userland programs that are designed to work with this +driver. The aoetools are on sourceforge. + + http://aoetools.sourceforge.net/ + +The scripts in this Documentation/aoe directory are intended to +document the use of the driver and are not necessary if you install +the aoetools. + + +CREATING DEVICE NODES + + Users of udev should find the block device nodes created + automatically, but to create all the necessary device nodes, use the + udev configuration rules provided in udev.txt (in this directory). + + There is a udev-install.sh script that shows how to install these + rules on your system. + + If you are not using udev, two scripts are provided in + Documentation/aoe as examples of static device node creation for + using the aoe driver. + + rm -rf /dev/etherd + sh Documentation/aoe/mkdevs.sh /dev/etherd + + ... or to make just one shelf's worth of block device nodes ... + + sh Documentation/aoe/mkshelf.sh /dev/etherd 0 + + There is also an autoload script that shows how to edit + /etc/modprobe.conf to ensure that the aoe module is loaded when + necessary. + +USING DEVICE NODES + + "cat /dev/etherd/err" blocks, waiting for error diagnostic output, + like any retransmitted packets. + + "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to + limit ATA over Ethernet traffic to eth2 and eth4. AoE traffic from + untrusted networks should be ignored as a matter of security. See + also the aoe_iflist driver option described below. + + "echo > /dev/etherd/discover" tells the driver to find out what AoE + devices are available. + + These character devices may disappear and be replaced by sysfs + counterparts. Using the commands in aoetools insulates users from + these implementation details. + + The block devices are named like this: + + e{shelf}.{slot} + e{shelf}.{slot}p{part} + + ... so that "e0.2" is the third blade from the left (slot 2) in the + first shelf (shelf address zero). That's the whole disk. The first + partition on that disk would be "e0.2p1". + +USING SYSFS + + Each aoe block device in /sys/block has the extra attributes of + state, mac, and netif. The state attribute is "up" when the device + is ready for I/O and "down" if detected but unusable. The + "down,closewait" state shows that the device is still open and + cannot come up again until it has been closed. + + The mac attribute is the ethernet address of the remote AoE device. + The netif attribute is the network interface on the localhost + through which we are communicating with the remote AoE device. + + There is a script in this directory that formats this information + in a convenient way. Users with aoetools can use the aoe-stat + command. + + root@makki root# sh Documentation/aoe/status.sh + e10.0 eth3 up + e10.1 eth3 up + e10.2 eth3 up + e10.3 eth3 up + e10.4 eth3 up + e10.5 eth3 up + e10.6 eth3 up + e10.7 eth3 up + e10.8 eth3 up + e10.9 eth3 up + e4.0 eth1 up + e4.1 eth1 up + e4.2 eth1 up + e4.3 eth1 up + e4.4 eth1 up + e4.5 eth1 up + e4.6 eth1 up + e4.7 eth1 up + e4.8 eth1 up + e4.9 eth1 up + + Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver + option discussed below) instead of /dev/etherd/interfaces to limit + AoE traffic to the network interfaces in the given + whitespace-separated list. Unlike the old character device, the + sysfs entry can be read from as well as written to. + + It's helpful to trigger discovery after setting the list of allowed + interfaces. The aoetools package provides an aoe-discover script + for this purpose. You can also directly use the + /dev/etherd/discover special file described above. + +DRIVER OPTIONS + + There is a boot option for the built-in aoe driver and a + corresponding module parameter, aoe_iflist. Without this option, + all network interfaces may be used for ATA over Ethernet. Here is a + usage example for the module parameter. + + modprobe aoe_iflist="eth1 eth3" --- linux-2.6.8.1-t044-driver-update/Documentation/aoe/udev.txt 1970-01-01 03:00:00.000000000 +0300 +++ aoe/Documentation/aoe/udev.txt 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,23 @@ +# These rules tell udev what device nodes to create for aoe support. +# They may be installed along the following lines (adjusted to what +# you see on your system). +# +# ecashin@makki ~$ su +# Password: +# bash# find /etc -type f -name udev.conf +# /etc/udev/udev.conf +# bash# grep udev_rules= /etc/udev/udev.conf +# udev_rules="/etc/udev/rules.d/" +# bash# ls /etc/udev/rules.d/ +# 10-wacom.rules 50-udev.rules +# bash# cp /path/to/linux-2.6.xx/Documentation/aoe/udev.txt \ +# /etc/udev/rules.d/60-aoe.rules +# + +# aoe char devices +SUBSYSTEM="aoe", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM="aoe", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440" +SUBSYSTEM="aoe", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" + +# aoe block devices +KERNEL="etherd*", NAME="%k", GROUP="disk" --- linux-2.6.8.1-t044-driver-update/Documentation/aoe/autoload.sh 1970-01-01 03:00:00.000000000 +0300 +++ aoe/Documentation/aoe/autoload.sh 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,17 @@ +#!/bin/sh +# set aoe to autoload by installing the +# aliases in /etc/modprobe.conf + +f=/etc/modprobe.conf + +if test ! -r $f || test ! -w $f; then + echo "cannot configure $f for module autoloading" 1>&2 + exit 1 +fi + +grep major-152 $f >/dev/null +if [ $? = 1 ]; then + echo alias block-major-152 aoe >> $f + echo alias char-major-152 aoe >> $f +fi + --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoemain.c 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/aoemain.c 2005-09-29 18:30:40.000000000 +0400 @@ -0,0 +1,136 @@ +/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ +/* + * aoemain.c + * Module initialization routines, discover timer + */ + +#include +#include +#include +#include +#include +#include "aoe.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sam Hopkins "); +MODULE_DESCRIPTION("AoE block/char driver for 2.6.[0-9]+"); +/* this creates warning "Warning: could not find versions for .tmp_versions/aoe.mod" +MODULE_VERSION(VERSION); +*/ + +/* modinfo sometimes works, but this will be in sysfs */ +static char version[] = VERSION; +module_param_string(version, version, sizeof version, 0400); +MODULE_PARM_DESC(version, "aoe module version " VERSION "\n"); + +enum { TINIT, TRUN, TKILL }; + +/* copied from mm/slab.c for backwards compatibility */ +void * +aoe_kcalloc(size_t n, size_t size, int flags) +{ + void *ret = NULL; + + if (n != 0 && size > INT_MAX / n) + return ret; + + ret = kmalloc(n * size, flags); + if (ret) + memset(ret, 0, n * size); + return ret; +} + +static void +discover_timer(ulong vp) +{ + static struct timer_list t; + static volatile ulong die; + static spinlock_t lock; + ulong flags; + enum { DTIMERTICK = HZ * 60 }; /* one minute */ + + switch (vp) { + case TINIT: + init_timer(&t); + spin_lock_init(&lock); + t.data = TRUN; + t.function = discover_timer; + die = 0; + case TRUN: + spin_lock_irqsave(&lock, flags); + if (!die) { + t.expires = jiffies + DTIMERTICK; + add_timer(&t); + } + spin_unlock_irqrestore(&lock, flags); + + aoecmd_cfg(0xffff, 0xff); + return; + case TKILL: + spin_lock_irqsave(&lock, flags); + die = 1; + spin_unlock_irqrestore(&lock, flags); + + del_timer_sync(&t); + default: + return; + } +} + +static void +aoe_exit(void) +{ + discover_timer(TKILL); + + aoenet_exit(); + unregister_blkdev(AOE_MAJOR, DEVICE_NAME); + aoechr_exit(); + aoedev_exit(); + aoeblk_exit(); /* free cache after de-allocating bufs */ +} + +static int __init +aoe_init(void) +{ + int ret; + + ret = aoedev_init(); + if (ret) + return ret; + ret = aoechr_init(); + if (ret) + goto chr_fail; + ret = aoeblk_init(); + if (ret) + goto blk_fail; + ret = aoenet_init(); + if (ret) + goto net_fail; + ret = register_blkdev(AOE_MAJOR, DEVICE_NAME); + if (ret < 0) { + printk(KERN_ERR "aoe: aoeblk_init: can't register major\n"); + goto blkreg_fail; + } + + printk(KERN_INFO + "aoe: aoe_init: AoE v2.6-%s initialised.\n", + VERSION); + discover_timer(TINIT); + return 0; + + blkreg_fail: + aoenet_exit(); + net_fail: + aoeblk_exit(); + blk_fail: + aoechr_exit(); + chr_fail: + aoedev_exit(); + + printk(KERN_INFO "aoe: aoe_init: initialisation failure.\n"); + return ret; +} + +module_init(aoe_init); +module_exit(aoe_exit); + --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoecmd.c 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/aoecmd.c 2005-09-29 18:30:40.000000000 +0400 @@ -0,0 +1,652 @@ +/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ +/* + * aoecmd.c + * Filesystem request handling methods + */ + +#include +#include +#include +#include +#include +#include "aoe.h" + +#define TIMERTICK (HZ / 10) +#define MINTIMER (2 * TIMERTICK) +#define MAXTIMER (HZ << 1) +#define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */ + +static struct sk_buff * +new_skb(struct net_device *if_dev, ulong len) +{ + struct sk_buff *skb; + + if (len < ETH_ZLEN) + len = ETH_ZLEN; + + skb = alloc_skb(len, GFP_ATOMIC); + if (skb) { + skb->nh.raw = skb->mac.raw = skb->data; + skb->dev = if_dev; + skb->protocol = __constant_htons(ETH_P_AOE); + skb->priority = 0; + skb_put(skb, len); + memset(skb->head, 0, len); + skb->next = skb->prev = NULL; + + /* tell the network layer not to perform IP checksums + * or to get the NIC to do it + */ + skb->ip_summed = CHECKSUM_NONE; + } + return skb; +} + +static struct sk_buff * +skb_prepare(struct aoedev *d, struct frame *f) +{ + struct sk_buff *skb; + char *p; + + skb = new_skb(d->ifp, f->ndata + f->writedatalen); + if (!skb) { + printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n"); + return NULL; + } + + p = skb->mac.raw; + memcpy(p, f->data, f->ndata); + + if (f->writedatalen) { + p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr); + memcpy(p, f->bufaddr, f->writedatalen); + } + + return skb; +} + +static struct frame * +getframe(struct aoedev *d, int tag) +{ + struct frame *f, *e; + + f = d->frames; + e = f + d->nframes; + for (; ftag == tag) + return f; + return NULL; +} + +/* + * Leave the top bit clear so we have tagspace for userland. + * The bottom 16 bits are the xmit tick for rexmit/rttavg processing. + * This driver reserves tag -1 to mean "unused frame." + */ +static int +newtag(struct aoedev *d) +{ + register ulong n; + + n = jiffies & 0xffff; + return n |= (++d->lasttag & 0x7fff) << 16; +} + +static int +aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) +{ + u32 host_tag = newtag(d); + + memcpy(h->src, d->ifp->dev_addr, sizeof h->src); + memcpy(h->dst, d->addr, sizeof h->dst); + h->type = __constant_cpu_to_be16(ETH_P_AOE); + h->verfl = AOE_HVER; + h->major = cpu_to_be16(d->aoemajor); + h->minor = d->aoeminor; + h->cmd = AOECMD_ATA; + h->tag = cpu_to_be32(host_tag); + + return host_tag; +} + +static void +aoecmd_ata_rw(struct aoedev *d, struct frame *f) +{ + struct aoe_hdr *h; + struct aoe_atahdr *ah; + struct buf *buf; + struct sk_buff *skb; + ulong bcnt; + register sector_t sector; + char writebit, extbit; + + writebit = 0x10; + extbit = 0x4; + + buf = d->inprocess; + + sector = buf->sector; + bcnt = buf->bv_resid; + if (bcnt > MAXATADATA) + bcnt = MAXATADATA; + + /* initialize the headers & frame */ + h = (struct aoe_hdr *) f->data; + ah = (struct aoe_atahdr *) (h+1); + f->ndata = sizeof *h + sizeof *ah; + memset(h, 0, f->ndata); + f->tag = aoehdr_atainit(d, h); + f->waited = 0; + f->buf = buf; + f->bufaddr = buf->bufaddr; + + /* set up ata header */ + ah->scnt = bcnt >> 9; + ah->lba0 = sector; + ah->lba1 = sector >>= 8; + ah->lba2 = sector >>= 8; + ah->lba3 = sector >>= 8; + if (d->flags & DEVFL_EXT) { + ah->aflags |= AOEAFL_EXT; + ah->lba4 = sector >>= 8; + ah->lba5 = sector >>= 8; + } else { + extbit = 0; + ah->lba3 &= 0x0f; + ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ + } + + if (bio_data_dir(buf->bio) == WRITE) { + ah->aflags |= AOEAFL_WRITE; + f->writedatalen = bcnt; + } else { + writebit = 0; + f->writedatalen = 0; + } + + ah->cmdstat = WIN_READ | writebit | extbit; + + /* mark all tracking fields and load out */ + buf->nframesout += 1; + buf->bufaddr += bcnt; + buf->bv_resid -= bcnt; +/* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */ + buf->resid -= bcnt; + buf->sector += bcnt >> 9; + if (buf->resid == 0) { + d->inprocess = NULL; + } else if (buf->bv_resid == 0) { + buf->bv++; + buf->bv_resid = buf->bv->bv_len; + buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; + } + + skb = skb_prepare(d, f); + if (skb) { + skb->next = NULL; + if (d->sendq_hd) + d->sendq_tl->next = skb; + else + d->sendq_hd = skb; + d->sendq_tl = skb; + } +} + +/* enters with d->lock held */ +void +aoecmd_work(struct aoedev *d) +{ + struct frame *f; + struct buf *buf; +loop: + f = getframe(d, FREETAG); + if (f == NULL) + return; + if (d->inprocess == NULL) { + if (list_empty(&d->bufq)) + return; + buf = container_of(d->bufq.next, struct buf, bufs); + list_del(d->bufq.next); +/*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */ + d->inprocess = buf; + } + aoecmd_ata_rw(d, f); + goto loop; +} + +static void +rexmit(struct aoedev *d, struct frame *f) +{ + struct sk_buff *skb; + struct aoe_hdr *h; + char buf[128]; + u32 n; + + n = newtag(d); + + snprintf(buf, sizeof buf, + "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", + "retransmit", + d->aoemajor, d->aoeminor, f->tag, jiffies, n); + aoechr_error(buf); + + h = (struct aoe_hdr *) f->data; + f->tag = n; + h->tag = cpu_to_be32(n); + + skb = skb_prepare(d, f); + if (skb) { + skb->next = NULL; + if (d->sendq_hd) + d->sendq_tl->next = skb; + else + d->sendq_hd = skb; + d->sendq_tl = skb; + } +} + +static int +tsince(int tag) +{ + int n; + + n = jiffies & 0xffff; + n -= tag & 0xffff; + if (n < 0) + n += 1<<16; + return n; +} + +static void +rexmit_timer(ulong vp) +{ + struct aoedev *d; + struct frame *f, *e; + struct sk_buff *sl; + register long timeout; + ulong flags, n; + + d = (struct aoedev *) vp; + sl = NULL; + + /* timeout is always ~150% of the moving average */ + timeout = d->rttavg; + timeout += timeout >> 1; + + spin_lock_irqsave(&d->lock, flags); + + if (d->flags & DEVFL_TKILL) { +tdie: spin_unlock_irqrestore(&d->lock, flags); + return; + } + f = d->frames; + e = f + d->nframes; + for (; ftag != FREETAG && tsince(f->tag) >= timeout) { + n = f->waited += timeout; + n /= HZ; + if (n > MAXWAIT) { /* waited too long. device failure. */ + aoedev_downdev(d); + goto tdie; + } + rexmit(d, f); + } + } + + sl = d->sendq_hd; + d->sendq_hd = d->sendq_tl = NULL; + if (sl) { + n = d->rttavg <<= 1; + if (n > MAXTIMER) + d->rttavg = MAXTIMER; + } + + d->timer.expires = jiffies + TIMERTICK; + add_timer(&d->timer); + + spin_unlock_irqrestore(&d->lock, flags); + + aoenet_xmit(sl); +} + +static void +ataid_complete(struct aoedev *d, unsigned char *id) +{ + u64 ssize; + u16 n; + + /* word 83: command set supported */ + n = le16_to_cpu(get_unaligned((u16 *) &id[83<<1])); + + /* word 86: command set/feature enabled */ + n |= le16_to_cpu(get_unaligned((u16 *) &id[86<<1])); + + if (n & (1<<10)) { /* bit 10: LBA 48 */ + d->flags |= DEVFL_EXT; + + /* word 100: number lba48 sectors */ + ssize = le64_to_cpu(get_unaligned((u64 *) &id[100<<1])); + + /* set as in ide-disk.c:init_idedisk_capacity */ + d->geo.cylinders = ssize; + d->geo.cylinders /= (255 * 63); + d->geo.heads = 255; + d->geo.sectors = 63; + } else { + d->flags &= ~DEVFL_EXT; + + /* number lba28 sectors */ + ssize = le32_to_cpu(get_unaligned((u32 *) &id[60<<1])); + + /* NOTE: obsolete in ATA 6 */ + d->geo.cylinders = le16_to_cpu(get_unaligned((u16 *) &id[54<<1])); + d->geo.heads = le16_to_cpu(get_unaligned((u16 *) &id[55<<1])); + d->geo.sectors = le16_to_cpu(get_unaligned((u16 *) &id[56<<1])); + } + d->ssize = ssize; + d->geo.start = 0; + if (d->gd != NULL) { + d->gd->capacity = ssize; + d->flags |= DEVFL_UP; + return; + } + if (d->flags & DEVFL_WORKON) { + printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! " + "(This really shouldn't happen).\n"); + return; + } + INIT_WORK(&d->work, aoeblk_gdalloc, d); + schedule_work(&d->work); + d->flags |= DEVFL_WORKON; +} + +static void +calc_rttavg(struct aoedev *d, int rtt) +{ + register long n; + + n = rtt; + if (n < MINTIMER) + n = MINTIMER; + else if (n > MAXTIMER) + n = MAXTIMER; + + /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ + n -= d->rttavg; + d->rttavg += n >> 2; +} + +void +aoecmd_ata_rsp(struct sk_buff *skb) +{ + struct aoedev *d; + struct aoe_hdr *hin; + struct aoe_atahdr *ahin, *ahout; + struct frame *f; + struct buf *buf; + struct sk_buff *sl; + register long n; + ulong flags; + char ebuf[128]; + u16 aoemajor; + + hin = (struct aoe_hdr *) skb->mac.raw; + aoemajor = be16_to_cpu(hin->major); + d = aoedev_by_aoeaddr(aoemajor, hin->minor); + if (d == NULL) { + snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " + "for unknown device %d.%d\n", + aoemajor, hin->minor); + aoechr_error(ebuf); + return; + } + + spin_lock_irqsave(&d->lock, flags); + + f = getframe(d, be32_to_cpu(hin->tag)); + if (f == NULL) { + spin_unlock_irqrestore(&d->lock, flags); + snprintf(ebuf, sizeof ebuf, + "%15s e%d.%d tag=%08x@%08lx\n", + "unexpected rsp", + be16_to_cpu(hin->major), + hin->minor, + be32_to_cpu(hin->tag), + jiffies); + aoechr_error(ebuf); + return; + } + + calc_rttavg(d, tsince(f->tag)); + + ahin = (struct aoe_atahdr *) (hin+1); + ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr)); + buf = f->buf; + + if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ + printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh " + "stat=%2.2Xh from e%ld.%ld\n", + ahout->cmdstat, ahin->cmdstat, + d->aoemajor, d->aoeminor); + if (buf) + buf->flags |= BUFFL_FAIL; + } else { + switch (ahout->cmdstat) { + case WIN_READ: + case WIN_READ_EXT: + n = ahout->scnt << 9; + if (skb->len - sizeof *hin - sizeof *ahin < n) { + printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt " + "ata data size in read. skb->len=%d\n", + skb->len); + /* fail frame f? just returning will rexmit. */ + spin_unlock_irqrestore(&d->lock, flags); + return; + } + memcpy(f->bufaddr, ahin+1, n); + case WIN_WRITE: + case WIN_WRITE_EXT: + break; + case WIN_IDENTIFY: + if (skb->len - sizeof *hin - sizeof *ahin < 512) { + printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size " + "in ataid. skb->len=%d\n", skb->len); + spin_unlock_irqrestore(&d->lock, flags); + return; + } + ataid_complete(d, (char *) (ahin+1)); + /* d->flags |= DEVFL_WC_UPDATE; */ + break; + default: + printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " + "outbound ata command %2.2Xh for %d.%d\n", + ahout->cmdstat, + be16_to_cpu(hin->major), + hin->minor); + } + } + + if (buf) { + buf->nframesout -= 1; + if (buf->nframesout == 0 && buf->resid == 0) { + unsigned long duration = jiffies - buf->start_time; + unsigned long n_sect = buf->bio->bi_size >> 9; + struct gendisk *disk = d->gd; + + if (bio_data_dir(buf->bio) == WRITE) { + disk_stat_inc(disk, writes); + disk_stat_add(disk, write_ticks, duration); + disk_stat_add(disk, write_sectors, n_sect); + } else { + disk_stat_inc(disk, reads); + disk_stat_add(disk, read_ticks, duration); + disk_stat_add(disk, read_sectors, n_sect); + } + disk_stat_add(disk, io_ticks, duration); + n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; + bio_endio(buf->bio, buf->bio->bi_size, n); + mempool_free(buf, d->bufpool); + } + } + + f->buf = NULL; + f->tag = FREETAG; + + aoecmd_work(d); + + sl = d->sendq_hd; + d->sendq_hd = d->sendq_tl = NULL; + + spin_unlock_irqrestore(&d->lock, flags); + + aoenet_xmit(sl); +} + +void +aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) +{ + struct aoe_hdr *h; + struct aoe_cfghdr *ch; + struct sk_buff *skb, *sl; + struct net_device *ifp; + + sl = NULL; + + read_lock(&dev_base_lock); + for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { + dev_hold(ifp); + if (!is_aoe_netif(ifp)) + continue; + + skb = new_skb(ifp, sizeof *h + sizeof *ch); + if (skb == NULL) { + printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n"); + continue; + } + h = (struct aoe_hdr *) skb->mac.raw; + memset(h, 0, sizeof *h + sizeof *ch); + + memset(h->dst, 0xff, sizeof h->dst); + memcpy(h->src, ifp->dev_addr, sizeof h->src); + h->type = __constant_cpu_to_be16(ETH_P_AOE); + h->verfl = AOE_HVER; + h->major = cpu_to_be16(aoemajor); + h->minor = aoeminor; + h->cmd = AOECMD_CFG; + + skb->next = sl; + sl = skb; + } + read_unlock(&dev_base_lock); + + aoenet_xmit(sl); +} + +/* + * Since we only call this in one place (and it only prepares one frame) + * we just return the skb. Usually we'd chain it up to the aoedev sendq. + */ +static struct sk_buff * +aoecmd_ata_id(struct aoedev *d) +{ + struct aoe_hdr *h; + struct aoe_atahdr *ah; + struct frame *f; + struct sk_buff *skb; + + f = getframe(d, FREETAG); + if (f == NULL) { + printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. " + "This shouldn't happen.\n"); + return NULL; + } + + /* initialize the headers & frame */ + h = (struct aoe_hdr *) f->data; + ah = (struct aoe_atahdr *) (h+1); + f->ndata = sizeof *h + sizeof *ah; + memset(h, 0, f->ndata); + f->tag = aoehdr_atainit(d, h); + f->waited = 0; + f->writedatalen = 0; + + /* this message initializes the device, so we reset the rttavg */ + d->rttavg = MAXTIMER; + + /* set up ata header */ + ah->scnt = 1; + ah->cmdstat = WIN_IDENTIFY; + ah->lba3 = 0xa0; + + skb = skb_prepare(d, f); + + /* we now want to start the rexmit tracking */ + d->flags &= ~DEVFL_TKILL; + d->timer.data = (ulong) d; + d->timer.function = rexmit_timer; + d->timer.expires = jiffies + TIMERTICK; + add_timer(&d->timer); + + return skb; +} + +void +aoecmd_cfg_rsp(struct sk_buff *skb) +{ + struct aoedev *d; + struct aoe_hdr *h; + struct aoe_cfghdr *ch; + ulong flags, sysminor, aoemajor; + u16 bufcnt; + struct sk_buff *sl; + enum { MAXFRAMES = 8 }; + + h = (struct aoe_hdr *) skb->mac.raw; + ch = (struct aoe_cfghdr *) (h+1); + + /* + * Enough people have their dip switches set backwards to + * warrant a loud message for this special case. + */ + aoemajor = be16_to_cpu(h->major); + if (aoemajor == 0xfff) { + printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf " + "address is all ones. Check shelf dip switches\n"); + return; + } + + sysminor = SYSMINOR(aoemajor, h->minor); + if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) { + printk(KERN_INFO + "aoe: e%ld.%d: minor number too large\n", + aoemajor, (int) h->minor); + return; + } + + bufcnt = be16_to_cpu(ch->bufcnt); + if (bufcnt > MAXFRAMES) /* keep it reasonable */ + bufcnt = MAXFRAMES; + + d = aoedev_set(sysminor, h->src, skb->dev, bufcnt); + if (d == NULL) { + printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n"); + return; + } + + spin_lock_irqsave(&d->lock, flags); + + if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) { + spin_unlock_irqrestore(&d->lock, flags); + return; + } + + d->fw_ver = be16_to_cpu(ch->fwver); + + /* we get here only if the device is new */ + sl = aoecmd_ata_id(d); + + spin_unlock_irqrestore(&d->lock, flags); + + aoenet_xmit(sl); +} + --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/Makefile 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/Makefile 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,6 @@ +# +# Makefile for ATA over Ethernet +# + +obj-$(CONFIG_ATA_OVER_ETH) += aoe.o +aoe-objs := aoeblk.o aoechr.o aoecmd.o aoedev.o aoemain.o aoenet.o --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoedev.c 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/aoedev.c 2005-09-29 18:30:40.000000000 +0400 @@ -0,0 +1,177 @@ +/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ +/* + * aoedev.c + * AoE device utility functions; maintains device list. + */ + +#include +#include +#include +#include "aoe.h" + +static struct aoedev *devlist; +static spinlock_t devlist_lock; + +struct aoedev * +aoedev_by_aoeaddr(int maj, int min) +{ + struct aoedev *d; + ulong flags; + + spin_lock_irqsave(&devlist_lock, flags); + + for (d=devlist; d; d=d->next) + if (d->aoemajor == maj && d->aoeminor == min) + break; + + spin_unlock_irqrestore(&devlist_lock, flags); + return d; +} + +/* called with devlist lock held */ +static struct aoedev * +aoedev_newdev(ulong nframes) +{ + struct aoedev *d; + struct frame *f, *e; + + d = aoe_kcalloc(1, sizeof *d, GFP_ATOMIC); + if (d == NULL) + return NULL; + f = aoe_kcalloc(nframes, sizeof *f, GFP_ATOMIC); + if (f == NULL) { + kfree(d); + return NULL; + } + + d->nframes = nframes; + d->frames = f; + e = f + nframes; + for (; ftag = FREETAG; + + spin_lock_init(&d->lock); + init_timer(&d->timer); + d->bufpool = NULL; /* defer to aoeblk_gdalloc */ + INIT_LIST_HEAD(&d->bufq); + d->next = devlist; + devlist = d; + + return d; +} + +void +aoedev_downdev(struct aoedev *d) +{ + struct frame *f, *e; + struct buf *buf; + struct bio *bio; + + d->flags |= DEVFL_TKILL; + del_timer(&d->timer); + + f = d->frames; + e = f + d->nframes; + for (; ftag = FREETAG, f->buf = NULL, f++) { + if (f->tag == FREETAG || f->buf == NULL) + continue; + buf = f->buf; + bio = buf->bio; + if (--buf->nframesout == 0) { + mempool_free(buf, d->bufpool); + bio_endio(bio, bio->bi_size, -EIO); + } + } + d->inprocess = NULL; + + while (!list_empty(&d->bufq)) { + buf = container_of(d->bufq.next, struct buf, bufs); + list_del(d->bufq.next); + bio = buf->bio; + mempool_free(buf, d->bufpool); + bio_endio(bio, bio->bi_size, -EIO); + } + + if (d->nopen) + d->flags |= DEVFL_CLOSEWAIT; + if (d->gd) + d->gd->capacity = 0; + + d->flags &= ~DEVFL_UP; +} + +struct aoedev * +aoedev_set(ulong sysminor, unsigned char *addr, struct net_device *ifp, ulong bufcnt) +{ + struct aoedev *d; + ulong flags; + + spin_lock_irqsave(&devlist_lock, flags); + + for (d=devlist; d; d=d->next) + if (d->sysminor == sysminor) + break; + + if (d == NULL && (d = aoedev_newdev(bufcnt)) == NULL) { + spin_unlock_irqrestore(&devlist_lock, flags); + printk(KERN_INFO "aoe: aoedev_set: aoedev_newdev failure.\n"); + return NULL; + } /* if newdev, (d->flags & DEVFL_UP) == 0 for below */ + + spin_unlock_irqrestore(&devlist_lock, flags); + spin_lock_irqsave(&d->lock, flags); + + d->ifp = ifp; + memcpy(d->addr, addr, sizeof d->addr); + if ((d->flags & DEVFL_UP) == 0) { + aoedev_downdev(d); /* flushes outstanding frames */ + d->sysminor = sysminor; + d->aoemajor = AOEMAJOR(sysminor); + d->aoeminor = AOEMINOR(sysminor); + } + + spin_unlock_irqrestore(&d->lock, flags); + return d; +} + +static void +aoedev_freedev(struct aoedev *d) +{ + if (d->gd) { + aoedisk_rm_sysfs(d); + del_gendisk(d->gd); + put_disk(d->gd); + } + kfree(d->frames); + if (d->bufpool) + mempool_destroy(d->bufpool); + kfree(d); +} + +void +aoedev_exit(void) +{ + struct aoedev *d; + ulong flags; + + flush_scheduled_work(); + + while ((d = devlist)) { + devlist = d->next; + + spin_lock_irqsave(&d->lock, flags); + aoedev_downdev(d); + spin_unlock_irqrestore(&d->lock, flags); + + del_timer_sync(&d->timer); + aoedev_freedev(d); + } +} + +int __init +aoedev_init(void) +{ + spin_lock_init(&devlist_lock); + return 0; +} + --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoenet.c 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/aoenet.c 2005-09-29 18:30:40.000000000 +0400 @@ -0,0 +1,209 @@ +/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ +/* + * aoenet.c + * Ethernet portion of AoE driver + */ + +#include +#include +#include +#include +#include "aoe.h" + +#define NECODES 5 + +static char *aoe_errlist[] = +{ + "no such error", + "unrecognized command code", + "bad argument parameter", + "device unavailable", + "config string present", + "unsupported version" +}; + +enum { + IFLISTSZ = 1024, +}; + +static char aoe_iflist[IFLISTSZ]; +module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600); +MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\"\n"); + +#ifndef MODULE +static int __init aoe_iflist_setup(char *str) +{ + strncpy(aoe_iflist, str, IFLISTSZ); + aoe_iflist[IFLISTSZ - 1] = '\0'; + return 1; +} + +__setup("aoe_iflist=", aoe_iflist_setup); +#endif + +/* This function is copied here from linux-2.6.10-rc3-bk11/lib/string.c + * for compatibility with FC2, which issues a warning on MODPOST + * about strcspn being undefined. + */ +static size_t +aoe_strcspn(const char *s, const char *reject) +{ + const char *p; + const char *r; + size_t count = 0; + + for (p = s; *p != '\0'; ++p) { + for (r = reject; *r != '\0'; ++r) { + if (*p == *r) + return count; + } + ++count; + } + + return count; +} + +int +is_aoe_netif(struct net_device *ifp) +{ + register char *p, *q; + register int len; + + if (aoe_iflist[0] == '\0') + return 1; + + p = aoe_iflist + strspn(aoe_iflist, WHITESPACE); + for (; *p; p = q + strspn(q, WHITESPACE)) { + q = p + aoe_strcspn(p, WHITESPACE); + if (q != p) + len = q - p; + else + len = strlen(p); /* last token in aoe_iflist */ + + if (strlen(ifp->name) == len && !strncmp(ifp->name, p, len)) + return 1; + if (q == p) + break; + } + + return 0; +} + +int +set_aoe_iflist(const char __user *user_str, size_t size) +{ + if (size >= IFLISTSZ) + return -EINVAL; + + if (copy_from_user(aoe_iflist, user_str, size)) { + printk(KERN_INFO "aoe: %s: copy from user failed\n", __FUNCTION__); + return -EFAULT; + } + aoe_iflist[size] = 0x00; + return 0; +} + +u64 +mac_addr(char addr[6]) +{ + u64 n = 0; + char *p = (char *) &n; + + memcpy(p + 2, addr, 6); /* (sizeof addr != 6) */ + + return __be64_to_cpu(n); +} + +static struct sk_buff * +skb_check(struct sk_buff *skb) +{ + if (skb_is_nonlinear(skb)) + if ((skb = skb_share_check(skb, GFP_ATOMIC))) + if (skb_linearize(skb, GFP_ATOMIC) < 0) { + dev_kfree_skb(skb); + return NULL; + } + return skb; +} + +void +aoenet_xmit(struct sk_buff *sl) +{ + struct sk_buff *skb; + + while ((skb = sl)) { + sl = sl->next; + skb->next = skb->prev = NULL; + dev_queue_xmit(skb); + } +} + +/* + * (1) len doesn't include the header by default. I want this. + */ +static int +aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) +{ + struct aoe_hdr *h; + u32 n; + + skb = skb_check(skb); + if (!skb) + return 0; + + if (!is_aoe_netif(ifp)) + goto exit; + + //skb->len += ETH_HLEN; /* (1) */ + skb_push(skb, ETH_HLEN); /* (1) */ + + h = (struct aoe_hdr *) skb->mac.raw; + n = be32_to_cpu(h->tag); + if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31)) + goto exit; + + if (h->verfl & AOEFL_ERR) { + n = h->err; + if (n > NECODES) + n = 0; + if (net_ratelimit()) + printk(KERN_ERR "aoe: aoenet_rcv: error packet from %d.%d; " + "ecode=%d '%s'\n", + be16_to_cpu(h->major), h->minor, + h->err, aoe_errlist[n]); + goto exit; + } + + switch (h->cmd) { + case AOECMD_ATA: + aoecmd_ata_rsp(skb); + break; + case AOECMD_CFG: + aoecmd_cfg_rsp(skb); + break; + default: + printk(KERN_INFO "aoe: aoenet_rcv: unknown cmd %d\n", h->cmd); + } +exit: + dev_kfree_skb(skb); + return 0; +} + +static struct packet_type aoe_pt = { + .type = __constant_htons(ETH_P_AOE), + .func = aoenet_rcv, +}; + +int __init +aoenet_init(void) +{ + dev_add_pack(&aoe_pt); + return 0; +} + +void +aoenet_exit(void) +{ + dev_remove_pack(&aoe_pt); +} + --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoe.h 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/aoe.h 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,177 @@ +/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ +#define VERSION "14" +#define AOE_MAJOR 152 +#define DEVICE_NAME "aoe" + +/* AOE_PARTITIONS is set in the Makefile */ + +#define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * NPERSHELF + (aoeminor)) +#define AOEMAJOR(sysminor) ((sysminor) / NPERSHELF) +#define AOEMINOR(sysminor) ((sysminor) % NPERSHELF) +#define WHITESPACE " \t\v\f\n" +/* for compatibility, so that this driver builds for kernels with + * or without AoE already in them + */ +#ifndef ETH_P_AOE +#define ETH_P_AOE 0x88a2 +#endif + +enum { + AOECMD_ATA, + AOECMD_CFG, + + AOEFL_RSP = (1<<3), + AOEFL_ERR = (1<<2), + + AOEAFL_EXT = (1<<6), + AOEAFL_DEV = (1<<4), + AOEAFL_ASYNC = (1<<1), + AOEAFL_WRITE = (1<<0), + + AOECCMD_READ = 0, + AOECCMD_TEST, + AOECCMD_PTEST, + AOECCMD_SET, + AOECCMD_FSET, + + AOE_HVER = 0x10, +}; + +struct aoe_hdr { + unsigned char dst[6]; + unsigned char src[6]; + u16 type; + unsigned char verfl; + unsigned char err; + u16 major; + unsigned char minor; + unsigned char cmd; + u32 tag; +}; + +struct aoe_atahdr { + unsigned char aflags; + unsigned char errfeat; + unsigned char scnt; + unsigned char cmdstat; + unsigned char lba0; + unsigned char lba1; + unsigned char lba2; + unsigned char lba3; + unsigned char lba4; + unsigned char lba5; + unsigned char res[2]; +}; + +struct aoe_cfghdr { + u16 bufcnt; + u16 fwver; + unsigned char res; + unsigned char aoeccmd; + unsigned char cslen[2]; +}; + +enum { + DEVFL_UP = 1, /* device is installed in system and ready for AoE->ATA commands */ + DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */ + DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ + DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */ + DEVFL_WC_UPDATE = (1<<4), /* this device needs to update write cache status */ + DEVFL_WORKON = (1<<4), + + BUFFL_FAIL = 1, +}; + +enum { + MAXATADATA = 1024, + NPERSHELF = 16, /* number of slots per shelf address */ + FREETAG = -1, + MIN_BUFS = 8, +}; + +struct buf { + struct list_head bufs; + ulong start_time; /* for disk stats */ + ulong flags; + ulong nframesout; + char *bufaddr; + ulong resid; + ulong bv_resid; + sector_t sector; + struct bio *bio; + struct bio_vec *bv; +}; + +struct frame { + int tag; + ulong waited; + struct buf *buf; + char *bufaddr; + int writedatalen; + int ndata; + + /* largest possible */ + unsigned char data[sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr)]; +}; + +struct aoedev { + struct aoedev *next; + unsigned char addr[6]; /* remote mac addr */ + ushort flags; + ulong sysminor; + ulong aoemajor; + ulong aoeminor; + ulong nopen; /* (bd_openers isn't available without sleeping) */ + ulong rttavg; /* round trip average of requests/responses */ + u16 fw_ver; /* version of blade's firmware */ + struct work_struct work;/* disk create work struct */ + struct gendisk *gd; + request_queue_t blkq; + struct hd_geometry geo; + sector_t ssize; + struct timer_list timer; + spinlock_t lock; + struct net_device *ifp; /* interface ed is attached to */ + struct sk_buff *sendq_hd; /* packets needing to be sent, list head */ + struct sk_buff *sendq_tl; + mempool_t *bufpool; /* for deadlock-free Buf allocation */ + struct list_head bufq; /* queue of bios to work on */ + struct buf *inprocess; /* the one we're currently working on */ + ulong lasttag; /* last tag sent */ + ulong nframes; /* number of frames below */ + struct frame *frames; +}; + + +int aoeblk_init(void); +void aoeblk_exit(void); +void aoeblk_gdalloc(void *); +void aoedisk_rm_sysfs(struct aoedev *d); + +int aoechr_init(void); +void aoechr_exit(void); +void aoechr_error(char *); + +void aoecmd_work(struct aoedev *d); +void aoecmd_cfg(ushort, unsigned char); +void aoecmd_ata_rsp(struct sk_buff *); +void aoecmd_cfg_rsp(struct sk_buff *); + +int aoedev_init(void); +void aoedev_exit(void); +struct aoedev *aoedev_by_aoeaddr(int maj, int min); +void aoedev_downdev(struct aoedev *d); +struct aoedev *aoedev_set(ulong, unsigned char *, struct net_device *, ulong); +int aoedev_busy(void); + +int aoenet_init(void); +void aoenet_exit(void); +void aoenet_xmit(struct sk_buff *); +int is_aoe_netif(struct net_device *ifp); +int set_aoe_iflist(const char __user *str, size_t size); + +u64 mac_addr(char addr[6]); + +/* for compatibility with older 2.6 kernels lacking kcalloc + */ +extern void *aoe_kcalloc(size_t, size_t, int); --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoeblk.c 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/aoeblk.c 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,281 @@ +/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ +/* + * aoeblk.c + * block device routines + */ + +#include +#include +#include +#include +#include +#include +#include "aoe.h" +#include "disk_attr.h" + +static kmem_cache_t *buf_pool_cache; + +static ssize_t aoedisk_show_state(struct gendisk * disk, char *page) +{ + struct aoedev *d = disk->private_data; + + return snprintf(page, PAGE_SIZE, + "%s%s\n", + (d->flags & DEVFL_UP) ? "up" : "down", + (d->flags & DEVFL_CLOSEWAIT) ? ",closewait" : ""); +} +static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page) +{ + struct aoedev *d = disk->private_data; + + return snprintf(page, PAGE_SIZE, "%012llx\n", + (unsigned long long)mac_addr(d->addr)); +} +static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page) +{ + struct aoedev *d = disk->private_data; + + return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name); +} +/* firmware version */ +static ssize_t aoedisk_show_fwver(struct gendisk * disk, char *page) +{ + struct aoedev *d = disk->private_data; + + return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver); +} + +static struct disk_attribute disk_attr_state = { + .attr = {.name = "state", .mode = S_IRUGO }, + .show = aoedisk_show_state +}; +static struct disk_attribute disk_attr_mac = { + .attr = {.name = "mac", .mode = S_IRUGO }, + .show = aoedisk_show_mac +}; +static struct disk_attribute disk_attr_netif = { + .attr = {.name = "netif", .mode = S_IRUGO }, + .show = aoedisk_show_netif +}; +static struct disk_attribute disk_attr_fwver = { + .attr = {.name = "firmware-version", .mode = S_IRUGO }, + .show = aoedisk_show_fwver +}; + +static void +aoedisk_add_sysfs(struct aoedev *d) +{ + sysfs_create_file(&d->gd->kobj, &disk_attr_state.attr); + sysfs_create_file(&d->gd->kobj, &disk_attr_mac.attr); + sysfs_create_file(&d->gd->kobj, &disk_attr_netif.attr); + sysfs_create_file(&d->gd->kobj, &disk_attr_fwver.attr); +} +void +aoedisk_rm_sysfs(struct aoedev *d) +{ + sysfs_remove_link(&d->gd->kobj, "state"); + sysfs_remove_link(&d->gd->kobj, "mac"); + sysfs_remove_link(&d->gd->kobj, "netif"); + sysfs_remove_link(&d->gd->kobj, "firmware-version"); +} + +static int +aoeblk_open(struct inode *inode, struct file *filp) +{ + struct aoedev *d; + ulong flags; + + d = inode->i_bdev->bd_disk->private_data; + + spin_lock_irqsave(&d->lock, flags); + if (d->flags & DEVFL_UP) { + d->nopen++; + spin_unlock_irqrestore(&d->lock, flags); + return 0; + } + spin_unlock_irqrestore(&d->lock, flags); + return -ENODEV; +} + +static int +aoeblk_release(struct inode *inode, struct file *filp) +{ + struct aoedev *d; + ulong flags; + + d = inode->i_bdev->bd_disk->private_data; + + spin_lock_irqsave(&d->lock, flags); + + if (--d->nopen == 0 && (d->flags & DEVFL_CLOSEWAIT)) { + d->flags &= ~DEVFL_CLOSEWAIT; + spin_unlock_irqrestore(&d->lock, flags); + aoecmd_cfg(d->aoemajor, d->aoeminor); + return 0; + } + spin_unlock_irqrestore(&d->lock, flags); + + return 0; +} + +static int +aoeblk_make_request(request_queue_t *q, struct bio *bio) +{ + struct aoedev *d; + struct buf *buf; + struct sk_buff *sl; + ulong flags; + + blk_queue_bounce(q, &bio); + + d = bio->bi_bdev->bd_disk->private_data; + buf = mempool_alloc(d->bufpool, GFP_NOIO); + if (buf == NULL) { + printk(KERN_INFO "aoe: aoeblk_make_request: buf allocation " + "failure\n"); + bio_endio(bio, bio->bi_size, -ENOMEM); + return 0; + } + memset(buf, 0, sizeof(*buf)); + INIT_LIST_HEAD(&buf->bufs); + buf->start_time = jiffies; + buf->bio = bio; + buf->resid = bio->bi_size; + buf->sector = bio->bi_sector; + buf->bv = buf->bio->bi_io_vec; + buf->bv_resid = buf->bv->bv_len; + buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; + + spin_lock_irqsave(&d->lock, flags); + + if ((d->flags & DEVFL_UP) == 0) { + printk(KERN_INFO "aoe: aoeblk_make_request: device %ld.%ld is not up\n", + d->aoemajor, d->aoeminor); + spin_unlock_irqrestore(&d->lock, flags); + mempool_free(buf, d->bufpool); + bio_endio(bio, bio->bi_size, -ENXIO); + return 0; + } + + list_add_tail(&buf->bufs, &d->bufq); + aoecmd_work(d); + + sl = d->sendq_hd; + d->sendq_hd = d->sendq_tl = NULL; + + spin_unlock_irqrestore(&d->lock, flags); + + aoenet_xmit(sl); + return 0; +} + +/* This ioctl implementation expects userland to have the device node + * permissions set so that only priviledged users can open an aoe + * block device directly. + */ +static int +aoeblk_ioctl(struct inode *inode, struct file *filp, uint cmd, ulong arg) +{ + struct aoedev *d; + + if (!arg) + return -EINVAL; + + d = inode->i_bdev->bd_disk->private_data; + if ((d->flags & DEVFL_UP) == 0) { + printk(KERN_ERR "aoe: aoeblk_ioctl: disk not up\n"); + return -ENODEV; + } + + if (cmd == HDIO_GETGEO) { + d->geo.start = get_start_sect(inode->i_bdev); + if (!copy_to_user((void __user *) arg, &d->geo, sizeof d->geo)) + return 0; + return -EFAULT; + } + printk(KERN_INFO "aoe: aoeblk_ioctl: unknown ioctl %d\n", cmd); + return -ENOTTY; /* for older kernels */ +} + +static struct block_device_operations aoe_bdops = { + .open = aoeblk_open, + .release = aoeblk_release, + .ioctl = aoeblk_ioctl, + .owner = THIS_MODULE, +}; + +/* alloc_disk and add_disk can sleep */ +void +aoeblk_gdalloc(void *vp) +{ + struct aoedev *d = vp; + struct gendisk *gd; + ulong flags; + + gd = alloc_disk(AOE_PARTITIONS); + if (gd == NULL) { + printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate disk " + "structure for %ld.%ld\n", d->aoemajor, d->aoeminor); + spin_lock_irqsave(&d->lock, flags); + d->flags &= ~DEVFL_WORKON; + spin_unlock_irqrestore(&d->lock, flags); + return; + } + + d->bufpool = mempool_create(MIN_BUFS, + mempool_alloc_slab, mempool_free_slab, + buf_pool_cache); + if (d->bufpool == NULL) { + printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate bufpool " + "for %ld.%ld\n", d->aoemajor, d->aoeminor); + put_disk(gd); + spin_lock_irqsave(&d->lock, flags); + d->flags &= ~DEVFL_WORKON; + spin_unlock_irqrestore(&d->lock, flags); + return; + } + + spin_lock_irqsave(&d->lock, flags); + blk_queue_make_request(&d->blkq, aoeblk_make_request); + gd->major = AOE_MAJOR; + gd->first_minor = d->sysminor * AOE_PARTITIONS; + gd->fops = &aoe_bdops; + gd->private_data = d; + gd->capacity = d->ssize; + snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%ld", + d->aoemajor, d->aoeminor); + + gd->queue = &d->blkq; + d->gd = gd; + d->flags &= ~DEVFL_WORKON; + d->flags |= DEVFL_UP; + + spin_unlock_irqrestore(&d->lock, flags); + + add_disk(gd); + aoedisk_add_sysfs(d); + + printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu " + "sectors\n", (unsigned long long)mac_addr(d->addr), + d->aoemajor, d->aoeminor, + d->fw_ver, (long long)d->ssize); +} + +void +aoeblk_exit(void) +{ + kmem_cache_destroy(buf_pool_cache); +} + +int __init +aoeblk_init(void) +{ + buf_pool_cache = kmem_cache_create("aoe_bufs", + sizeof(struct buf), + 0, 0, NULL, NULL); + if (buf_pool_cache == NULL) + return -ENOMEM; + + return 0; +} + --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/aoechr.c 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/aoechr.c 2005-09-29 18:30:39.000000000 +0400 @@ -0,0 +1,245 @@ +/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ +/* + * aoechr.c + * AoE character device driver + */ + +#include +#include +#include +#include "aoe.h" + +enum { + //MINOR_STAT = 1, (moved to sysfs) + MINOR_ERR = 2, + MINOR_DISCOVER, + MINOR_INTERFACES, + MSGSZ = 2048, + NARGS = 10, + NMSG = 100, /* message backlog to retain */ +}; + +struct aoe_chardev { + ulong minor; + char name[32]; +}; + +enum { EMFL_VALID = 1 }; + +struct ErrMsg { + short flags; + short len; + char *msg; +}; + +static struct ErrMsg emsgs[NMSG]; +static int emsgs_head_idx, emsgs_tail_idx; +static struct semaphore emsgs_sema; +static spinlock_t emsgs_lock; +static int nblocked_emsgs_readers; +static struct class *aoe_class; +static struct aoe_chardev chardevs[] = { + { MINOR_ERR, "err" }, + { MINOR_DISCOVER, "discover" }, + { MINOR_INTERFACES, "interfaces" }, +}; + +static int +discover(void) +{ + aoecmd_cfg(0xffff, 0xff); + return 0; +} + +static int +interfaces(const char __user *str, size_t size) +{ + if (set_aoe_iflist(str, size)) { + printk(KERN_CRIT + "%s: could not set interface list: %s\n", + __FUNCTION__, "too many interfaces"); + return -EINVAL; + } + return 0; +} + +void +aoechr_error(char *msg) +{ + struct ErrMsg *em; + char *mp; + ulong flags, n; + + n = strlen(msg); + + spin_lock_irqsave(&emsgs_lock, flags); + + em = emsgs + emsgs_tail_idx; + if ((em->flags & EMFL_VALID)) { +bail: spin_unlock_irqrestore(&emsgs_lock, flags); + return; + } + + mp = kmalloc(n, GFP_ATOMIC); + if (mp == NULL) { + printk(KERN_CRIT "aoe: aoechr_error: allocation failure, len=%ld\n", n); + goto bail; + } + + memcpy(mp, msg, n); + em->msg = mp; + em->flags |= EMFL_VALID; + em->len = n; + + emsgs_tail_idx++; + emsgs_tail_idx %= ARRAY_SIZE(emsgs); + + spin_unlock_irqrestore(&emsgs_lock, flags); + + if (nblocked_emsgs_readers) + up(&emsgs_sema); +} + +static ssize_t +aoechr_write(struct file *filp, const char __user *buf, size_t cnt, loff_t *offp) +{ + int ret = -EINVAL; + + switch ((unsigned long) filp->private_data) { + default: + printk(KERN_INFO "aoe: aoechr_write: can't write to that file.\n"); + break; + case MINOR_DISCOVER: + ret = discover(); + break; + case MINOR_INTERFACES: + ret = interfaces(buf, cnt); + break; + } + if (ret == 0) + ret = cnt; + return ret; +} + +static int +aoechr_open(struct inode *inode, struct file *filp) +{ + int n, i; + + n = MINOR(inode->i_rdev); + filp->private_data = (void *) (unsigned long) n; + + for (i = 0; i < ARRAY_SIZE(chardevs); ++i) + if (chardevs[i].minor == n) + return 0; + return -EINVAL; +} + +static int +aoechr_rel(struct inode *inode, struct file *filp) +{ + return 0; +} + +static ssize_t +aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off) +{ + unsigned long n; + char *mp; + struct ErrMsg *em; + ssize_t len; + ulong flags; + + n = (unsigned long) filp->private_data; + switch (n) { + case MINOR_ERR: + spin_lock_irqsave(&emsgs_lock, flags); +loop: + em = emsgs + emsgs_head_idx; + if ((em->flags & EMFL_VALID) == 0) { + if (filp->f_flags & O_NDELAY) { + spin_unlock_irqrestore(&emsgs_lock, flags); + return -EAGAIN; + } + nblocked_emsgs_readers++; + + spin_unlock_irqrestore(&emsgs_lock, flags); + + n = down_interruptible(&emsgs_sema); + + spin_lock_irqsave(&emsgs_lock, flags); + + nblocked_emsgs_readers--; + + if (n) { + spin_unlock_irqrestore(&emsgs_lock, flags); + return -ERESTARTSYS; + } + goto loop; + } + if (em->len > cnt) { + spin_unlock_irqrestore(&emsgs_lock, flags); + return -EAGAIN; + } + mp = em->msg; + len = em->len; + em->msg = NULL; + em->flags &= ~EMFL_VALID; + + emsgs_head_idx++; + emsgs_head_idx %= ARRAY_SIZE(emsgs); + + spin_unlock_irqrestore(&emsgs_lock, flags); + + n = copy_to_user(buf, mp, len); + kfree(mp); + return n == 0 ? len : -EFAULT; + default: + return -EFAULT; + } +} + +static struct file_operations aoe_fops = { + .write = aoechr_write, + .read = aoechr_read, + .open = aoechr_open, + .release = aoechr_rel, + .owner = THIS_MODULE, +}; + +int __init +aoechr_init(void) +{ + int n, i; + + n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops); + if (n < 0) { + printk(KERN_ERR "aoe: aoechr_init: can't register char device\n"); + return n; + } + sema_init(&emsgs_sema, 0); + spin_lock_init(&emsgs_lock); + aoe_class = class_create(THIS_MODULE, "aoe"); + if (IS_ERR(aoe_class)) { + unregister_chrdev(AOE_MAJOR, "aoechr"); + return PTR_ERR(aoe_class); + } + for (i = 0; i < ARRAY_SIZE(chardevs); ++i) + class_device_create(aoe_class, + MKDEV(AOE_MAJOR, chardevs[i].minor), + NULL, chardevs[i].name); + + return 0; +} + +void +aoechr_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(chardevs); ++i) + class_device_destroy(aoe_class, MKDEV(AOE_MAJOR, chardevs[i].minor)); + class_destroy(aoe_class); + unregister_chrdev(AOE_MAJOR, "aoechr"); +} + --- linux-2.6.8.1-t044-driver-update/drivers/block/aoe/disk_attr.h 1970-01-01 03:00:00.000000000 +0300 +++ aoe/drivers/block/aoe/disk_attr.h 2005-09-29 18:30:40.000000000 +0400 @@ -0,0 +1 @@ +/* struct disk_attribute is defined in kernel headers */ --- linux-2.6.8.1-t044-driver-update/drivers/block/Makefile 2005-10-25 15:30:35.202697120 +0400 +++ aoe/drivers/block/Makefile 2005-10-25 15:16:33.911592808 +0400 @@ -35,6 +35,7 @@ obj-$(CONFIG_BLK_DEV_XD) += xd.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o +obj-$(CONFIG_ATA_OVER_ETH) += aoe/ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o --- linux-2.6.8.1-t044-driver-update/drivers/block/Kconfig 2005-10-25 15:30:35.202697120 +0400 +++ aoe/drivers/block/Kconfig 2005-10-25 15:16:09.321331096 +0400 @@ -347,6 +347,13 @@ config LBD your machine, or if you want to have a raid or loopback device bigger than 2TB. Otherwise say N. +config ATA_OVER_ETH + tristate "ATA over Ethernet support" + help + AoE is a simple protocol used to package ATA commands and responses + for transmission over Ethernet. AoE also provides hosts with a method + for obtaining information about the EtherDrive blade. + source "drivers/s390/block/Kconfig" endmenu